#!/usr/local/bin/gawk -f # Convert super(sub)scripts and latin characters written in TeX-like to html # $Id: charconv.awk,v 1.1 1999/11/14 14:57:25 aonuma Exp aonuma $ # Shuji AONUMA (aonuma@lcc-toulouse.fr) BEGIN { math=-1 #DASH=1 # enable &endash; and &emdash; } ($0=="") { # math=-1 } ($0 ~ "\\$"){ if (DEBUG) print math,"Math> "$0 Math() if (DEBUG) print } ($0){ TeX() Latin() # ~ not converted in Latin() && not after / is non-braking space. gsub("[^\\\\/~]~", "&\\ ") gsub("~ ", "\\ ") gsub("\\\\~", "~") gsub("@\\\\@", "\\") # \ escaped in TeX() } ($0 ~ /Converted by / ) { ROOT=substr($0, index($0, "\"")+1) ROOT=substr(ROOT, 1, index(ROOT, "/scripts")-1) } ($0 ~ "") { if (ROOT) printf("
Converted by
charconv.awk on %s\n", ROOT, strftime("%Y-%m-%dT%T+0900")) else printf("
Converted by charconv.awk on %s\n", strftime("%Y-%m-%dT%T+0900")) } (!DEBUG) { print } function Math() { n=split($0, M, "$") # if (M[1]=="") math*=(-1) # if ($0 ~ /\\$$/) j=-1 else j=1 $0="" for (i=1; i<=n; i++) { if (DEBUG) print i, math, M[i] if (math==1) {M[i]=TagMath(M[i])} else { if (M[i] !~ /\\\\$/ && sub("\\\\$", "$", M[i])) math*=(-1) } # \$ --> $ $0=sprintf("%s%s", $0, M[i]) if (i", "\\>", T) gsub("<", "\\<", T) # sup, sub #_{20} while (T ~ /[_][{]/) { sub("[_][{]", "", T) sub("[}]", "", T) } while (T ~ /[\^][{]/) { sub("[\\^][{]", "", T) sub("[}]", "", T) } # _2 while (T ~ /[_][^_{]/) { sub("[_][^_{]", "&", T) sub("[_]", "", T) } while (T ~ /[\^][^_{]/) { sub("[\\^][^_\{]", "&", T) sub("[\\^]", "", T) } gsub("@I@@", "", T) gsub("@/I@@", "", T) return (T) #Other functions of math mode are not supported. } function TeX(){ sub("\\\\\\\\\\[.*\\]$", "
\n
") sub("\\\\\\\\$", "
") gsub("\\\\\\\\", "@\\@") if (DASH) { gsub("---", "\\&emdash;") # { "emdash", "---", '-' }, gsub("--", "\\&endash;") # { "endash", "--", '-' }, } gsub("\\\\slash", "/") } function Latin() { # taken from html2latex.c (HTML2LaTeX -- Converting HTML files to LaTeX) # by Frans Faase # gsub("~", NULL) # /*160*/ { NULL, "~", '\0' }, # gsub("!'", NULL) # /*161*/ { NULL, "!'", '\0' }, # gsub(NULL, NULL) # /*162*/ { NULL, NULL, '\0' }, /* "$\\\\cents" */ # gsub("\\\\pounds", NULL) # /*163*/ { NULL, "\\\\pounds", '\0' }, # gsub(NULL, NULL) # /*164*/ { NULL, NULL, '\0' }, # gsub(NULL, NULL) # /*165*/ { NULL, NULL, '\0' }, /* Yen */ # gsub("{\\\\tt |}", NULL) # /*166*/ { NULL, "{\\\\tt |}", '\0' }, gsub("\\\\S", "\\§") # /*167*/ { "sect", "\\\\S", '\0' }, # gsub("{\\\\tt{}\"{}}", NULL) # /*168*/ { NULL, "{\\\\tt{}\"{}}", '\0' }, gsub("\\\\copyright ", "\\&c;") # /*169*/ { "c", "\\\\copyright ", 'c' }, # gsub(NULL, NULL) # /*170*/ { NULL, NULL, '\0' }, # gsub("\\\\ll", NULL) # /*171*/ { NULL, "$\\\\ll", '\0' }, # gsub("\\\\neg", NULL) # /*172*/ { NULL, "$\\\\neg", '\0' }, # gsub("\\\\-", NULL) # /*173*/ { NULL, "\\\\-", '\0' }, # gsub("{{\\\\ooalign{\\\\hfil\\\\raise.07ex\\\\hbox{R}\\\\hfil\\\\crcr\\\\mathhexbox20D}}}", NULL) # /*174*/ { NULL, "{{\\\\ooalign{\\\\hfil\\\\raise.07ex\\\\hbox{R}\\\\hfil\\\\crcr\\\\mathhexbox20D}}}", '\0' }, # gsub(NULL, NULL) # /*175*/ { NULL, NULL, '\0' }, /* "\\\\B " */ # gsub("{}^\\\\circ", NULL) # /*176*/ { NULL, "${}^\\\\circ", '\0' }, # gsub("\\\\pm", NULL) # /*177*/ { NULL, "$\\\\pm", '\0' }, # gsub("{}^2", NULL) # /*178*/ { NULL, "${}^2", '\0' }, # gsub("{}^3", NULL) # /*179*/ { NULL, "${}^3", '\0' }, # gsub("\\\\'{}", NULL) # /*180*/ { NULL, "\\\\'{}", '\0' }, # gsub("\\\\mu", NULL) # /*181*/ { NULL, "$\\\\mu", '\0' }, # gsub("{\\\\P}", NULL) # /*182*/ { NULL, "{\\\\P}", '\0' }, # gsub(NULL, NULL) # /*183*/ { NULL, NULL, '\0' }, /* "\\\\D " */ # gsub("\\\\c{}", NULL) # /*184*/ { NULL, "\\\\c{}", '\0' }, # gsub("{}^1", NULL) # /*185*/ { NULL, "${}^1", '\0' }, # gsub(NULL, NULL) # /*186*/ { NULL, NULL, '\0' }, /* ^\underbar{o} */ # gsub("\\\\gg", NULL) # /*187*/ { NULL, "$\\\\gg", '\0' }, # gsub("\\\\frac14", NULL) # /*188*/ { NULL, "$\\\\frac14", '\0' }, # gsub("\\\\frac12", NULL) # /*189*/ { NULL, "$\\\\frac12", '\0' }, # gsub("\\\\frac34", NULL) # /*190*/ { NULL, "$\\\\frac34", '\0' }, #?? gsub("?'", "\\¿") # /*191*/ { "iquest", "?'", '\0' }, gsub("\\\\`A", "\\À") # /*192*/ { "Agrave", "\\\\`A", 'A' }, gsub("\\\\'A", "\\Á") # /*193*/ { "Aacute", "\\\\'A", 'A' }, gsub("\\\\\\^A", "\\Â") # /*194*/ { "Acirc", "\\\\^A" , 'A' }, gsub("\\\\~A", "\\Ã") # /*195*/ { "Atilde", "\\\\~A", 'A' }, gsub("\\\\\"A", "\\Ä") # /*196*/ { "Auml", "\\\\\"A", 'A' }, gsub("{\\\\AA}", "\\Å") # /*197*/ { "Aring", "{\\\\AA}", 'A' }, gsub("{\\\\AE}", "\\Æ") # /*198*/ { "AElig", "{\\\\AE}", 'A' }, gsub("\\\\c C", "\\Ç") # /*199*/ { "Ccedil", "\\\\c C", 'C' }, gsub("\\\\c{C}", "\\Ç") # /*199*/ { "Ccedil", "\\\\c C", 'C' }, gsub("\\\\`E", "\\È") # /*200*/ { "Egrave", "\\\\`E", 'E' }, gsub("\\\\'E", "\\É") # /*201*/ { "Eacute", "\\\\'E", 'E' }, gsub("\\\\\\^E", "\\Ê") # /*202*/ { "Ecirc", "\\\\^E", 'E' }, gsub("\\\\\"E", "\\Ë") # /*203*/ { "Euml", "\\\\\"E", 'E' }, gsub("\\\\`I", "\\Ì") # /*204*/ { "Igrave", "\\\\`I", 'I' }, gsub("\\\\'I", "\\Í") # /*205*/ { "Iacute", "\\\\'I", 'I' }, gsub("\\\\\\^I", "\\Î") # /*206*/ { "Icirc", "\\\\^I", 'I' }, gsub("\\\\\"I", "\\Ï") # /*207*/ { "Iuml", "\\\\\"I", 'I' }, # gsub(NULL, "\\Ð") # /*208*/ { "ETH", NULL, 'D' }, /* -D */ gsub("\\\\~N", "\\Ñ") # /*209*/ { "Ntilde", "\\\\~N", 'N' }, gsub("\\\\`O", "\\Ò") # /*210*/ { "Ograve", "\\\\`O", 'O' }, gsub("\\\\'O", "\\Ó") # /*211*/ { "Oacute", "\\\\'O", 'O' }, gsub("\\\\\\^O", "\\Ô") # /*212*/ { "Ocirc", "\\\\^O", 'O' }, gsub("\\\\~O", "\\Õ") # /*213*/ { "Otilde", "\\\\~O", 'O' }, gsub("\\\\\"O", "\\Ö") # /*214*/ { "Ouml", "\\\\\"O", 'O' }, # gsub("\\\\times", NULL) # /*215*/ { NULL, "$\\\\times", 'x' }, gsub("\\\\times", "x") # /*215*/ { NULL, "$\\\\times", 'x' }, gsub("{\\\\O}", "\\Ø") # /*216*/ { "Oslash", "{\\\\O}", 'O' }, gsub("\\\\`U", "\\Ù") # /*217*/ { "Ugrave", "\\\\`U", 'U' }, gsub("\\\\'U", "\\Ú") # /*218*/ { "Uacute", "\\\\'U", 'U' }, gsub("\\\\\\^U", "\\Û") # /*219*/ { "Ucirc", "\\\\^U", 'U' }, gsub("\\\\\"U", "\\Ü") # /*220*/ { "Uuml", "\\\\\"U", 'U' }, gsub("\\\\'Y", "\\Ý") # /*221*/ { "Yacute", "\\\\'Y", 'Y' }, # gsub(NULL, "\\Þ") # /*222*/ { "THORN", NULL, 'P' }, /* p thorn */ gsub("{\\\\ss}", "\\ß") # /*223*/ { "szlig", "{\\\\ss}", 's' }, gsub("\\\\`a", "\\à") # /*224*/ { "agrave", "\\\\`a", 'a' }, gsub("\\\\'a", "\\á") # /*225*/ { "aacute", "\\\\'a", 'a' }, gsub("\\\\\\^a", "\\â") # /*226*/ { "acirc", "\\\\^a", 'a' }, gsub("\\\\~a", "\\ã") # /*227*/ { "atilde", "\\\\~a", 'a' }, gsub("\\\\\"a", "\\ä") # /*228*/ { "auml", "\\\\\"a", 'a' }, gsub("{\\\\aa}", "\\å") # /*229*/ { "aring", "{\\\\aa}", 'a' }, gsub("{\\\\ae}", "\\æ") # /*230*/ { "aelig", "{\\\\ae}", 'a' }, gsub("\\\\c c", "\\ç") # /*231*/ { "ccedil", "\\\\c c", 'c' }, gsub("\\\\c{c}", "\\ç") # /*231*/ { "ccedil", "\\\\c c", 'c' }, gsub("\\\\`e", "\\è") # /*232*/ { "egrave", "\\\\`e", 'e' }, gsub("\\\\'e", "\\é") # /*233*/ { "eacute", "\\\\'e", 'e' }, gsub("\\\\\\^e", "\\ê") # /*234*/ { "ecirc", "\\\\^e", 'e' }, gsub("\\\\\"e", "\\ë") # /*235*/ { "euml", "\\\\\"e", 'e' }, gsub("\\\\`{\\\\i}", "\\ì") # /*236*/ { "igrave", "\\\\`{\\\\i}", 'i' }, gsub("\\\\'{\\\\i}", "\\í") # /*237*/ { "iacute", "\\\\'{\\\\i}", 'i' }, gsub("\\\\\\^{\\\\i}", "\\î") # /*238*/ { "icirc", "\\\\^{\\\\i}", 'i' }, gsub("\\\\\"{\\\\i}", "\\ï") # /*239*/ { "iuml", "\\\\\"{\\\\i}", 'i' }, gsub("\\\\v o", "\\ð") # /*240*/ { "eth", "\\\\v o", 'e' }, gsub("\\\\v{o}", "\\ð") # /*240*/ { "eth", "\\\\v o", 'e' }, gsub("\\\\~n", "\\ñ") # /*241*/ { "ntilde", "\\\\~n", 'n' }, gsub("\\\\`o", "\\ò") # /*242*/ { "ograve", "\\\\`o", 'o' }, gsub("\\\\'o", "\\ó") # /*243*/ { "oacute", "\\\\'o", 'o' }, gsub("\\\\\\^o", "\\ô") # /*244*/ { "ocirc", "\\\\^o", 'o' }, gsub("\\\\~o", "\\õ") # /*245*/ { "otilde", "\\\\~o", 'o' }, gsub("\\\\\"o", "\\ö") # /*246*/ { "ouml", "\\\\\"o", 'o' }, # gsub("\\\\div", NULL) # /*247*/ { NULL, "$\\\\div", '\0' }, gsub("\\\\o", "\\ø") # /*248*/ { "oslash", "\\\\o", 'o' }, gsub("\\\\`u", "\\ù") # /*249*/ { "ugrave", "\\\\`u", 'u' }, gsub("\\\\'u", "\\ú") # /*250*/ { "uacute", "\\\\'u", 'u' }, gsub("\\\\\\^u", "\\û") # /*251*/ { "ucirc", "\\\\^u", 'u' }, gsub("\\\\\"u", "\\ü") # /*252*/ { "uuml", "\\\\\"u", 'u' }, gsub("\\\\'y", "\\ý") # /*253*/ { "yacute", "\\\\'y", 'y' }, gsub("\\\\thron", "\\þ") # /*254*/ { "thorn", "p", 'p' }, /* p thorn */ gsub("\\\\'y", "\\ÿ") # /*255*/ { "yuml", "\\\\'y", 'y' }, gsub("\\\\aa", "\\å") # { "aring", "\\\\aa", 'a' }, gsub("\\\\v O", "\\&Eth;") # { "Eth", "\\\\v O", 'E' }, gsub("\\\\v{O}", "\\&Eth;") # { "Eth", "\\\\v O", 'E' }, gsub("\\\\\\^{\\\\i}", "\\î") # { "icirc", "\\\\^{\\\\i}", 'i' }, gsub("\\&Thorn", "\\&Thorn;") # { "Thorn", "P", 'P' }, gsub("\\\\\"Y", "\\Ÿ") # { "Yuml", "\\\\\"Y", 'Y' }, gsub("\\\\quad{}", "\\ ") # { "emsp", "\\\\quad{}", ' ' }, gsub("\\\\enskip{}", "\\ ") # { "ensp", "\\\\enskip{}",' ' }, # gsub("", "\\­") # { "shy", "", 0 }, # gsub("", "\\&pd;") # { "pd", "", 0 }, gsub("\\\\copyright ", "\\©") # { "copy", "\\\\copyright ", 'c' }, # gsub("", "\\®") # { "reg", "", 0 }, # gsub("", "\\™") # { "trade", "", 0 }, gsub("\\\\alpha", "\\α") # { "alpha", "$\\\\alpha", 0 }, gsub("\\\\beta", "\\β") # { "beta", "$\\\\beta", 0 }, gsub("\\\\gamma", "\\γ") # { "gamma", "$\\\\gamma", 0 }, gsub("\\\\delta", "\\δ") # { "delta", "$\\\\delta", 0 }, gsub("\\\\epsilon", "\\ε") # { "epsi", "$\\\\epsilon",0 }, gsub("\\\\zeta", "\\ζ") # { "zeta", "$\\\\zeta", 0 }, gsub("\\\\eta", "\\η") # { "eta", "$\\\\eta", 0 }, gsub("\\\\theta", "\\θ") # { "theta", "$\\\\theta", 0 }, gsub("\\\\vartheta", "\\ϑ") # { "thetav", "$\\\\vartheta",0 }, gsub("\\\\iota", "\\ι") # { "iota", "$\\\\iota", 0 }, gsub("\\\\kappa", "\\κ") # { "kappa", "$\\\\kappa", 0 }, gsub("\\\\lambda", "\\λ") # { "lambda", "$\\\\lambda",0 }, gsub("\\\\mu", "\\μ") # { "mu", "$\\\\mu", 0 }, gsub("\\\\nu", "\\ν") # { "nu", "$\\\\nu", 0 }, gsub("\\\\xi", "\\ξ") # { "xi", "$\\\\xi", 0 }, gsub("\\\\omicron", "\\ο") # { "omicron","o", 0 }, gsub("\\\\pi", "\\π") # { "pi", "$\\\\pi", 0 }, gsub("\\\\rho", "\\ρ") # { "rho", "$\\\\rho", 0 }, gsub("\\\\sigma", "\\σ") # { "sigma", "$\\\\sigma", 0 }, gsub("\\\\tau", "\\τ") # { "tau", "$\\\\tau", 0 }, gsub("\\\\upsilon", "\\υ") # { "upsi", "$\\\\upsilon",0 }, gsub("\\\\phi", "\\φ") # { "phi", "$\\\\phi", 0 }, gsub("\\\\chi", "\\χ") # { "chi", "$\\\\chi", 0 }, gsub("\\\\psi", "\\ψ") # { "psi", "$\\\\psi", 0 }, gsub("\\\\omega", "\\ω") # { "omega", "$\\\\omega", 0 }, gsub("\\\\Alpha", "\\Α") # { "Alpha", "A", 'A' }, #no \Alpha in TeX gsub("\\\\Beta", "\\Β") # { "Beta", "B", 'B' }, gsub("\\\\Gamma", "\\Γ") # { "Gamma", "$\\\\Gamma", 0 }, gsub("\\\\Delta", "\\Δ") # { "Delta", "$\\\\Delta", 0 }, gsub("\\\\Epsi", "\\&Epsi;") # { "Epsi", "E", 'E' }, gsub("\\\\Zeta", "\\Ζ") # { "Zeta", "Z", 'Z' }, gsub("\\\\Eta", "\\Η") # { "Eta", "H", 'H' }, gsub("\\\\Theta", "\\Θ") # { "Theta", "$\\\\Theta", 0 }, gsub("\\\\Iota", "\\Ι") # { "Iota", "I", 'I' }, gsub("\\\\Kappa", "\\Κ") # { "Kappa", "K", 'K' }, gsub("\\\\Lambda", "\\Λ") # { "Lambda", "$\\\\Lambda",0 }, gsub("\\\\Mu", "\\Μ") # { "Mu", "M", 'M' }, gsub("\\\\Nu", "\\Ν") # { "Nu", "N", 'N' }, gsub("\\\\Xi", "\\Ξ") # { "Xi", "$\\\\Xi", 0 }, gsub("\\\\Pi", "\\Π") # { "Pi", "$\\\\Pi", 0 }, gsub("\\\\Rho", "\\Ρ") # { "Rho", "R", 'R' }, gsub("\\\\Sigma", "\\Σ") # { "Sigma", "$\\\\Sigma", 0 }, gsub("\\\\Tau", "\\Τ") # { "Tau", "T", 'T' }, gsub("\\\\Upsilon", "\\ϒ") # { "Upsi", "$\\\\Upsilon",0 }, gsub("\\\\Phi", "\\Φ") # { "Phi", "$\\\\Phi", 0 }, gsub("\\\\Chi", "\\Χ") # { "Chi", "X", 'X' }, gsub("\\\\Psi", "\\Ψ") # { "Psi", "$\\\\Psi", 0 }, gsub("\\\\Omega", "\\Ω") # { "Omega", "$\\\\Omega", 0 }, gsub("\\\\\\&", "\\&") # { "amp", "\\\\\\&", '\\&' }, gsub("\\\\\"", "\\"") # { "quot", "\\\\\"{}", '"' }, }