#!/bin/bash
#
# script to convert (almost) any plain ascii - text to a TeX format
# converts most basic characters and does:
#   -  apply a header
#   -  apply commands at the end
#   -  convert special characters
#
# Usage:   totex [-tgeuopxh] <file-to-convert>
#
#          where:  -t   means to start TeX as well
#                   g   means: document in German
#                   e   means: document in English
#                   u   means: document in US-English
#                   o   means: OriginalTeX (all functions of german.tex and
#                                           umlaut.tex are switched off)
#                   p   means: enable/disable page numbers
#                   x   means: plain TeX (german.tex and umlaut.tex are
#                                         not required)
#                   h   means: display help
#
#          Output is to a new file 'file-to-convert.tex'
#
#
# Format of the input file:
#
# totex is designed to accept a wide variety of input files. However, input
# must be a text file with no special formatting characters added.
# There is a very limited range of options to take influence on the final
# format through the input file:
#    - a dot `.' as the first character on a line (optionally
#      preceded by blanks) forces a new alinea while continuing
#      with the same item{}
#    - embracing a ~word~ with a pair of tilde characters
#      or with two *asterisk* will set it in italic
#
#####################################################
#
#  Part for User Configuration
#
#####################################################
#
# values set in inches:
# --------------------
PAGE_HEIGHT=8.9
PAGE_WIDTH=6.5

#
# values set in pt
# ----------------
# what is the basic line spacing?
LINE_HEIGHT=15

# how much additional spacing for each paragraph?
PARA_SKIP=7

# additional spacing for new alineas in numbered items
ALIN_SKIP=2
MORE_SKIP=15

# amount of indentation for each new paragraph
PARA_INDENT=20

# how much additional space before each title?
TITLESKIP=0.2

#
# select language default (overridden by command line options)
# uncomment one of these:
#LANGUAGE=USenglish
#LANGUAGE=english
LANGUAGE=german

#
# Do you want the pages numbered at the bottom?
# (default to be overridden by command line option)
PAGENUMBERS=yes
#PAGENUMBERS=no

#
# if you have problems with lines beginning like "30. November" and
# these are falsely treated as numbered items then set to "no".
# normally "yes"
NUMBERED_ITEMS=yes

#
# should the script start a new line, when it finds that the first word of
# the line is followed by a colon [:] (its nice for chatroom listings!)
COLON_NEWLINE=yes

#
# select your fonts. Remember: double backslash is needed to make one
#  backslash
NORMAL_FONT='cmr10 scaled \\magstep 1'
BOLD___FONT='cmb10 scaled \\magstep 1'
TITLE__FONT='cmb10 scaled \\magstep 1'
EMPHA__FONT='cmti10 scaled \\magstep 1'


#####################################################
#  ( End of user configuration )
#####################################################

VERSION="Ver. 1.1"
OPTLETTERS="tgeuopxh"
LINEMODE='\\obeylines\\parskip = 0 pt\\par\\noindent'
TITLFONT='\\cmb'
TITLETEX="\\\\vskip $TITLESKIP in\\\\goodbreak$TITLFONT"

ALINEA_SKIP=$(($LINE_HEIGHT+$ALIN_SKIP))
NEW_ALINEA="\\\\hfill\\\\break\\\\vbox to $ALINEA_SKIP pt{ }\\\\indent"
STAR_FILL='\\hfill\\break\\line{\\leaders\\hbox to 1em{\\hss*\\hss}\\hfill}'
EQU__FILL='\\leaders\\hbox to 1 em{\\hss =\\hss}\\hfill'
PENALTY='\\widowpenalty=1000 \\clubpenalty=1000 \\tolerance=10000'
HOR_LINE='{\\par\\medskip\\hrule\\par\\medskip}'
CTRSKIP="\\\\par\\\\vskip $ALIN_SKIP "
MORSKIP="\\\\par\\\\vskip $MORE_SKIP "
CENTERED=${CTRSKIP}"pt\\\\centerline{"

# Check for valid options
# sorry, XX is needed, otherwise -n and -e are not useable,
# because they are recognized by 'echo' as option
OPTIONS=`echo "XX$1" | sed -e "/^XX-[$OPTLETTERS][$OPTLETTERS]*/!d"`
SOMETHI=`echo "XX$1" | sed -e '/^XX-/!d'`

if [ "$SOMETHI" != "$OPTIONS" ]; then
   echo -n "Illegal option "
   echo "$SOMETHI" | sed -e "s/XX-/-/"
   exit 0
fi

# The first Argument is a valid option; the second must be
# the name of the file to process.
if [ -n "$SOMETHI" ]; then
   ARGUMENT="$2"
else
   ARGUMENT="$1"
fi

OPT_TEX=`echo $OPTIONS | sed -e "/t/!d"`
OPT_GER=`echo $OPTIONS | sed -e "/g/!d"`
OPT_ENG=`echo $OPTIONS | sed -e "/e/!d"`
OPT_USE=`echo $OPTIONS | sed -e "/u/!d"`
OPT_ORI=`echo $OPTIONS | sed -e "/o/!d"`
OPT_PAG=`echo $OPTIONS | sed -e "/p/!d"`
OPT_PLN=`echo $OPTIONS | sed -e "/x/!d"`
OPT_HLP=`echo $OPTIONS | sed -e "/h/!d"`

CMT=""
QTONCHAR="\`\`"
QTOFFCHAR="\'\'"

if [ -n "$OPT_TEX" ]; then
   TEXFLAG="yes"
else
   TEXFLAG="no"
fi

if [ -n "$OPT_GER" ]; then
   LANGUAGE="german"
   QTONCHAR="\"\`"
   QTOFFCHAR="\"\'"
fi

if [ -n "$OPT_ENG" ]; then
   LANGUAGE="english"
fi

if [ -n "$OPT_USE" ]; then
   LANGUAGE="USenglish"
fi

if [ -n "$OPT_ORI" ]; then
   SELORIGINAL="\\originalTeX"
else
   SELORIGINAL="%\\originalTeX"
fi

if [ -n "$OPT_PLN" ]; then
   QTONCHAR="\`\`"
   QTOFFCHAR="\'\'"
   CMT="%"
   SELORIGINAL="%"
   IN_UML="%"
   IN_GER="%"
   IN_SEL="%"
else
   IN_UML="$CMT\\\\input umlaut.tex"
   IN_GER="$CMT\\\\input german.tex"
   IN_SEL="$CMT\\\\selectlanguage{$LANGUAGE}"
fi

if [ -n "$OPT_HLP" ]; then
   echo "totex Shell Script $VERSION"
   echo "Convert ASCII-text to TeX format"
   echo "Usage: totex [-$OPTLETTERS] <file_to_convert>"
   echo "where:  -t   means: start TeX as well"
   echo "         g   means: document in German"
   echo "         e   means: document in English"
   echo "         u   means: document in US-English"
   echo "         o   means: OriginalTeX (all functions of german.tex"
   echo "                                 are switched off)"
   echo "         x   means: plain TeX (no special characters in source text)"
   echo "                    overrides options -geuo"
   echo "                    german.tex and umlaut.tex are not required"
   echo "         p   means: enable/disable page numbers"
   echo "         h   means: display this help screen"
   exit 1
fi

if [ -z "$ARGUMENT" ]; then
   echo "totex Shell Script $VERSION"
   echo "Convert ASCII-text to TeX format"
   echo "Usage: totex [-$OPTLETTERS] <file_to_convert>"
   echo "       totex -h   for help"
   exit 1
fi

if ! [ -f "$ARGUMENT" ]; then
   echo "Input file $ARGUMENT not found"
   exit 1
fi

TMPFILE="$ARGUMENT".tmp
OUTFILE=`echo "$ARGUMENT" | sed -e "s/\\.txt$//"`.tex

echo "Converting $ARGUMENT to $OUTFILE"

if [ $PAGENUMBERS = yes ]; then
   if [ -n "$OPT_PAG" ]; then
      PAGENUMBERS="no"
   fi
else
   if [ -n "$OPT_PAG" ]; then
      PAGENUMBERS="yes"
   fi
fi


if [ $PAGENUMBERS = yes ]; then
   PAGETEX='%\\nopagenumbers'
   echo "pagenumbers enabled"
else
   PAGETEX='\\nopagenumbers'
fi

if [ $COLON_NEWLINE = yes ]; then
   START_NEWLINE='\\par\\noindent'
else
   START_NEWLINE=""
fi

if [ $NUMBERED_ITEMS = yes ]; then
   NIS_SED='s/^[ ]*[1-9]\. /\\par\\vskip '$ALIN_SKIP' pt\\item{&} /'
   NIM_SED='s/^[ ]*[1-9][0-9]\. /\\par\\vskip '$ALIN_SKIP' pt\\item{&} /'
else
   NIS_SED=""
   NIM_SED=""
fi

# - replace all $ by \$
# - preserve curled braces in the text
# - deal with quotes
# - replace special characters % & ~ # ^ < > « » ¢

cat $ARGUMENT  | tr '\322\251' '\047\242' | sed \
   -e "s/\\$/\\\\$/g" \
   -e "s/{/\$\\\\{\$/g" \
   -e "s/}/\$\\\\}\$/g" \
   -e "s/^\"/&{\\\\qton}/" \
   -e "s/[ ][ ]*[(]*\"/&{\\\\qton}/g" \
   -e "s/=\"/&{\\\\qton}/g" \
   -e "s/^(\"/&{\\\\qton}/" \
   -e "s/\"[\\.,!;)]*[ ][ ]*/{\\\\qtoff}&/g" \
   -e "s/\">/{\\\\qtoff}>/g" \
   -e "s/\"[\\.,!;)>]*$/{\\\\qtoff}&/g" \
   -e "s/%/\\\\%/g" \
   -e "s/&/\\\\&/g" \
   -e "s/\(~\)\([A-Z,a-z][A-Z,a-z]*\)\(~\)/{\\\\italic \2}/g" \
   -e "s/~/\\\\~{}/g" \
   -e "s/#/\\\\#/g" \
   -e "s/\\^'/'/g" \
   -e "s/\\^/\\\\^{}/g" \
   -e "s/</$<$/g" \
   -e "s/>/$>$/g" \
   -e "s/«/\\\\flqq{}/g" \
   -e "s/»/\\\\frqq{}/g" \
   -e "s/¢/\\\\copyright{}/g" \
   -e "s/^[ ]*___*[ ]*$/\\
$HOR_LINE/" \
   -e "s/^ *[-_] [-_] [-_][-_ ]*$/\\
$HOR_LINE/" \
   -e "s/_/\\\\_{}/g" \
   -e "s/^[ ]*---*[ ]*$/\\
$HOR_LINE/" \
   -e "s/\\.\\.\\.\\.*[ ]*/{\\\\dots} /g" \
   -e "s/\\.\\.\\.[ ][ ]*/{\\\\dots} /g" \
   -e "s/\\.\\.[ ][ ]*/{\\\\dots} /g" \
   -e "s/====*/\\\\equfill /g" \
   -e "s/^[ ]*\\./\\\\newalinea{}/" \
   -e "s/\(\\*\)\([A-Za-z][a-z]*\)\(\\*\)/{\\\\italic \2}/g" \
 > $TMPFILE

#
# This is all the stuff to be included at the top
# of the TeX - file:
cat $TMPFILE | sed \
   -e "1 i\\" \
   -e "% TeX file generated by totex $VERSION\\" \
   -e "\\\\font\\\\cm=$NORMAL_FONT\\" \
   -e "\\\\font\\\\cmb=$TITLE__FONT\\" \
   -e "\\\\font\\\\bold=$BOLD___FONT\\" \
   -e "\\\\font\\\\italic=$EMPHA__FONT\\" \
   -e "\\\\vsize = $PAGE_HEIGHT in\\" \
   -e "\\\\hsize = $PAGE_WIDTH in\\" \
   -e "\\\\baselineskip = $LINE_HEIGHT pt\\" \
   -e "\\\\parskip = $PARA_SKIP pt\\" \
   -e "\\\\parindent = $PARA_INDENT pt\\" \
   -e "$PAGETEX\\" \
   -e "\\\\raggedbottom\\" \
   -e "$PENALTY\\" \
   -e "$IN_UML\\" \
   -e "$IN_GER\\" \
   -e "$IN_SEL\\" \
   -e "$SELORIGINAL\\" \
   -e "\\\\def\\\\qton{$QTONCHAR}\\" \
   -e "\\\\def\\\\qtoff{$QTOFFCHAR}\\" \
   -e "\\\\def\\\\newalinea{$NEW_ALINEA}\\" \
   -e "\\\\def\\\\equfill{$EQU__FILL}\\" \
   -e "\\\\cm\\
" \
 > $OUTFILE


# Where is the beginning of a new line?
# Isolate single lines from EMail;
# the last is to try to catch alineas indicated with * or - and
# numbered items
cat $OUTFILE | sed \
   -e "s/^[ ]*From:.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^[ ]*From .*@.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^[ ]*To:.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^[ ]*Date:.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^[ ]*Subject:.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^[ ]*Host:.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^[ ]*Newsgroups:.*$/{\\\\par\\\\noindent &}/g" \
   -e "s/^[ ]*[A-Za-z0-9][-A-Za-z0-9]*:[ ]/{$START_NEWLINE &}/" \
   -e "s/^[ ]*[A-Za-z0-9][A-Z ]*:[ ]/{$START_NEWLINE &}/" \
   -e "s/^[ ]*[0-9]*\\.[0-9][0-9]*[ -]/\\\\par\\\\noindent &/" \
   -e "s/^ *[+] .*/{$LINEMODE &\\\\par}/" \
   -e "s/^\\\\[%#].*/{$LINEMODE &\\\\par}/" \
   -e "s/^[ ]*([a-z]) /\\\\par &/" \
   -e "s/^[ ]*([1-9])/\\\\par &/" \
   -e "s/^[ ]*([1-9][0-9])/\\\\par &/" \
   -e "s/^\\$>\\$.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/^ *\\$<\\$.*$/{$LINEMODE &\\\\par}/g" \
   -e "s/\"{\\\\qton}/{\\\\qton}/g" \
   -e "s/{\\\\qtoff}\"/{\\\\qtoff}/g" \
   -e "s/^[ ]*\\*\\**[ ]*$/$STAR_FILL/" \
   -e "s/^[ ]*\\*/   \\\\item{\\$\\\\bullet\\$} /" \
   -e "s/^[ ]*-[ ][ ]*/   \\\\item{-} /" \
   -e "s/^[ ]*[A-Z0-9,()][A-Z0-9,() ][A-Z0-9,'?!\\.() -]*$/$CENTERED&}/" \
   -e "s/\\\\centerline{[ ]*/\\\\centerline{/g" \
   -e "s/\\[LINK\\]//g" \
   -e "s/\\[INLINE\\]//g" \
   -e "s/^[ ,	]*$//" \
   -e "$ a\\" \
   -e "\\\\vfill\\" \
   -e "\\\\eject\\" \
   -e "\\\\end" \
> $TMPFILE

#
# In this section the whole file is taken to the hold buffer
# as *one single line* to look for titles and new paragraphs
cat $TMPFILE | sed -n \
   -e "1,$ H" \
   -e "$ g" \
   -e "s/\\n\\n[ ]*[1-9][0-9]\\./&@@@/g" \
   -e "s/\\n\\n[ ]*[1-9]\\./&@@@/g" \
   -e "s/\\n\\n[ ]*[A-ZÄÖÜ][A-Za-zÄ-ü0-9 '@+=,-]*/&@+@/g" \
   -e "s/[A-ZÄÖÜ][A-Za-zÄ-ü0-9 '@+=,-]*\\n\\n/@+@&/g" \
   -e "s/\\n\\n\\\\newalinea/\
\\\\newalinea/g" \
   -e "s/\\n\\n$CTRSKIP/\\
\\
$MORSKIP/g" \
   -e "$ p" \
> $OUTFILE


cat $OUTFILE | sed \
   -e "s/@+@..*@+@$/{$TITLETEX &}/" \
   -e "s/^[ ]*(ii*) */\\\\par &/" \
   -e "s/^[ ]*(i*vi*) */\\\\par &/" \
   -e "s/^[ ]*(i*xi*) */\\\\par &/" \
   -e "s/^[XVI][XVI]*\\. .*$/{$TITLETEX &}/" \
   -e "s/^[ ]*[a-z]\\. */\\\\par\\\\item{&} /" \
   -e "s/^[ ]*[1-9][0-9]\\.@@@/\\\\par\\\\vskip $ALIN_SKIP pt\\\\item{&} /" \
   -e "s/^[ ]*[1-9]\\.@@@/\\\\par\\\\vskip $ALIN_SKIP pt\\\\item{&} /" \
   -e "$NIS_SED" \
   -e "$NIM_SED" \
   -e "s/@+@//g" \
   -e "s/@@@//g" \
   -e "s/\\\\item{[ ]*/\\\\item{/" \
   -e "s/\\\\item{[0-9]*\\.[ ]*}/&@+@/" \
   -e "s/[ ]*}@+@/}/" \
   -e "s/\\\\par [ ]*/\\\\par /g" \
> $TMPFILE


cp $TMPFILE $OUTFILE

if [ "$TEXFLAG" = "yes" ]; then
   tex $OUTFILE
fi

rm $TMPFILE
echo "done"
exit 0
