#!/usr/bin/env bash APERTIUM_PATH="/usr/bin" #!/bin/bash PATH="${APERTIUM_PATH}:${PATH}" INPUT_FILE="/dev/stdin" OUTPUT_FILE="/dev/stdout" [ -z "$TMPDIR" ] && TMPDIR=/tmp message () { echo "USAGE: $(basename "$0") [-f format] [in [out]]" echo " -f format one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx" echo " in input file (stdin by default)" echo " out output file (stdout by default)" exit 1; } locale_utf8 () { LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1) export LC_CTYPE if [ "$LC_CTYPE" = "" ] then echo "Error: Install an UTF-8 locale in your system"; exit 1; fi } test_zip () { if ! command -v zip &>/dev/null; then echo "Error: Install 'zip' command in your system"; exit 1; fi if ! command -v unzip &>/dev/null; then echo "Error: Install 'unzip' command in your system"; exit 1; fi } test_gawk () { if ! command -v gawk &>/dev/null; then echo "Error: Install 'gawk' in your system" exit 1 fi } unformat_latex() { test_gawk if [ "$FICHERO" = "" ] then FICHERO=$(mktemp "$TMPDIR"/apertium.XXXXXXXX) cat > "$FICHERO" BORRAFICHERO="true" fi apertium-prelatex "$FICHERO" | \ apertium-utils-fixlatex | \ apertium-deslatex >"$SALIDA" if [ "$BORRAFICHERO" = "true" ] then rm -Rf "$FICHERO" fi } unformat_odt () { INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX) locale_utf8 test_zip unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" find "$INPUT_TMPDIR" | grep content\\\.xml |\ awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ apertium-desodt >"$SALIDA" rm -Rf "$INPUT_TMPDIR" } unformat_docx () { INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX) locale_utf8 test_zip unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" for i in $(find "$INPUT_TMPDIR"|grep "xlsx$"); do LOCALTEMP=$(mktemp "$TMPDIR"/apertium.XXXXXXXX) apertium -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP"; cp "$LOCALTEMP" "$i"; rm "$LOCALTEMP"; done; find "$INPUT_TMPDIR" | grep "xml" |\ grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\ awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ apertium-deswxml >"$SALIDA" rm -Rf "$INPUT_TMPDIR" } unformat_pptx () { INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX) locale_utf8 test_zip unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" for i in $(find "$INPUT_TMPDIR"|grep "xlsx$"); do LOCALTEMP=$(mktemp "$TMPDIR"/apertium.XXXXXXXX) apertium -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP" cp "$LOCALTEMP" "$i" rm "$LOCALTEMP" done; find . -path '**/slides/slide*.xml' |\ awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ apertium-despptx >"$SALIDA" rm -Rf "$INPUT_TMPDIR" } unformat_xlsx () { INPUT_TMPDIR=$(mktemp -d "$TMPDIR"/apertium.XXXXXXXX) locale_utf8 test_zip unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" find "$INPUT_TMPDIR" | grep "sharedStrings.xml" |\ awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ apertium-desxlsx >"$SALIDA" rm -Rf "$INPUT_TMPDIR" } while getopts "f:" opt; do case "$opt" in f) FORMAT=$OPTARG ;; \?) echo "ERROR: Unknown option $OPTARG" >&2; message >&2 ;; :) echo "ERROR: $OPTARG requires an argument" >&2; message >&2 ;; esac done shift "$((OPTIND-1))" case "$#" in 2) OUTPUT_FILE=$2; INPUT_FILE=$1; if [ ! -e "$INPUT_FILE" ]; then echo "Error: file '$INPUT_FILE' not found." message; fi ;; 1) INPUT_FILE=$1; if [ ! -e "$INPUT_FILE" ]; then echo "Error: file '$INPUT_FILE' not found." message; fi ;; 0) ;; *) message ;; esac if [ -z "$FORMAT" ]; then FORMAT="txt"; fi FORMATADOR=$FORMAT; FICHERO=$INPUT_FILE; SALIDA=$OUTPUT_FILE; case "$FORMATADOR" in rtf) MILOCALE=$(locale -a | grep -E -i -v -m1 'utf|^C|^POSIX$') if [ "$MILOCALE" = "" ] then echo "Error: Install a ISO-8859-1 compatible locale in your system"; exit 1; fi export LC_CTYPE=$MILOCALE ;; html-noent) FORMATADOR="html" ;; latex) unformat_latex exit 0 ;; odt) unformat_odt exit 0 ;; docx) unformat_docx exit 0 ;; xlsx) unformat_xlsx exit 0 ;; pptx) unformat_pptx exit 0 ;; wxml) locale_utf8 ;; *) ;; esac apertium-des"$FORMATADOR" "$FICHERO" >"$SALIDA"