#!/usr/local/bin/gawk -f # plain text file to html converter. # Copyright 1999 Shuji AONUMA (aonuma@lcc-toulouse.fr) # There is absolutely no warranty for the program. # You can redistribute it and/or modify it under the terms of # the GNU General Public License version 2 dated June, 1991. # # $Id: plain2html.awk,v 1.7 1999/11/24 10:04:16 aonuma Exp aonuma $ # BEGIN{ ## How to make input plain text file. ## Spaces and tabs at a beginning and end of a line are ignored. ## A command line is ## a line begun with a single period followed ## by COMMAND (that is a commnad for plain2html.awk) (eg .SAMPLE foo bar) ## PREAMBLE is a special part of the file. ## Some commands shuold be put in PREAMBLE (marked as [P] ## in this manual), and some show same ([PB]) or different behavior in PREAMBLE or not ## ([P]=blah blah blah, [B]=tac tac tac), ## and others will be ignored in PREAMBLE ([B]). ## PREAMBLE is from a top line of a input file ## to the line before non-COMMAND line. In addition, ## PREAMBLE commands can be specified from a gawk command line, ## for example, gawk -v SAMPLE="foo bar". ## BODY is a part after the first non-command line. # header, footer #TI title #[P]=generate a <HEAD> part, <TITLE>, <H1>; [B]=<H1> heading only. #SC, SSC, SSSC; section, subsection, subsubsectionの<H2>, <H3>, <H4> # LANG="ja" # CHARSET is set by SetCharset(LANG) # if (xxx) @xxx --> yyy substitutions if (!BASE) BASE="." #e.g. @BASE/index.html, / should be followed. if (!ROOT) ROOT=".." # @ROOT/ #FOOT (internal) END処理中にfooter part 出力 if (!ADDRESS) ADDRESS="default@address" # mail address (footer part 出力) if (!NAME) NAME="default name" # name (NAME{}{}の方と紛らわしいけど) if (!FOOTERFILE) FOOTERFILE="@ROOT/etc/footer.@LANG.txt" # FOOTERFILEがあれば<address>...</address>の代わりにこちらを使う。 #NOFOOTER=1 (FOOTERは出力しない) #ONLYFOOTER=1 (FOOTERのみ出力) if (ONLYFOOTER) {PrintFooter();exit} #TABLE関係。 if (!TS) TS = "not@defined%yet" # if (!TH) TH = "#" # TH行を示すのに使う ## If a field in a TH line begin with "=", this colum is aligned center. ## If a field in a TH line begin with ">", this colum is aligned right. ## NOTH, no title line (TH) in TABLE if (!TABLEHEADER) TABLEHEADER="<TABLE BORDER=5 CELLPADDING=0 CELLSPACING=0>" TABLE=0 #(internal) TABLE処理中はTABLE>0 TBLANK="---" # defaut item for blank data #TTAG=0 # if 1, create NAME anchor # CAPTION # リスト if (!LISTTYPE) LISTTYPE="UL" if (!LI) LI="@" # LI行を示すのに使う。 LIST=0 #(internal) LIST処理中はLIST=1 # DL (internal) description list mode # 以上の処理の対象にならなかった行の処理 if (!OUT) OUT=1 #出力しない 1出力する (古いヴァージョンとの互換性のため) # else OUT=0 # TEXT (internal) <P>の挿入 #\\, HREF{}{}, NAME, MARK{}{} # MENU=1 #menu mode MENU=1 if (NOMENU) MENU=0 if (MENU+0) MENU="MENULIST" # MENU=1のように数字だった場合。 if (!BODY) BODY="<body BGCOLOR=\"#ffffC0\" TEXT=\"#102c5c\" LINK=\"#0000FF\" VLINK=\"#339930\" ALINK=\"#FF3300\"><script type="text/javascript" src="http://hb.lycos.com/hb.js"></script> <script type="text/javascript" src="http://ratings.lycos.com/ratings/lycosRating.js.php"></script> <script type="text/javascript"><!--//--><![CDATA[//><!-- var cm_role = "live"; var cm_host = "tripod.lycos.com"; var cm_taxid = "/memberembedded"; var tripod_member_name = "aonuma"; var tripod_member_page = "aonuma/scripts/plain2html.awk"; var tripod_ratings_hash = "1258351847:8a6f21dc2c6a7c10c77903a2187e589a"; var lycos_ad_category = {"dmoz":"recreation\/food","ontarget":"&CAT=family%20and%20lifestyles&L2CAT=food%20and%20wine","find_what":"cooking school"}; var lycos_ad_remote_addr = "38.107.191.105"; var lycos_ad_www_server = "www.tripod.lycos.com"; var lycos_ad_track_small = "http://members.tripod.com/adm/img/common/ot_smallframe.gif?rand=845427"; var lycos_ad_track_served = "http://members.tripod.com/adm/img/common/ot_adserved.gif?rand=845427"; //--><!]]></script> <script type="text/javascript" src="http://scripts.lycos.com/catman/init.js"></script> <script type="text/javascript" src="http://members.tripod.com/adm/ad/code-start.js"></script> <script type="text/javascript" src="http://members.tripod.com/adm/ad/code-middle.js"></script> <script type="text/javascript" src="http://members.tripod.com/adm/ad/code-end.js"></script> <noscript> <img src="http://members.tripod.com/adm/img/common/ot_noscript.gif?rand=845427" alt="" width="1" height="1" /> <!-- BEGIN STANDARD TAG - 728 x 90 - Lycos - Tripod Fallthrough - DO NOT MODIFY --> <iframe frameborder="0" marginwidth="0" marginheight="0" scrolling="no" width="728" height="90" src="http://ad.yieldmanager.com/st?ad_type=iframe&ad_size=728x90&section=209094"></iframe> <!-- END TAG --> </noscript> " if (!MAINPAGE) { MAINPAGE="@ROOT/index.@LANG.html" # MENUのBACKで指定するページ } # DESC ## [P] <META NAME="description" ...> # KW ## [P] <META NAME="keywords" ...> PREAMBLE=1 # (internal) } END{ if (TABLE) print "</TABLE>" if (LIST) printf("</%s>\n\n", LISTTYPE) if (MENU) { printf("<CENTER><A NAME=\"%s\"><SMALL>auto menu</SMALL></A>\n", MENU) printf("[<A HREF=\"#ToP\"><SMALL>TOP of this page</SMALL></A>]\n") for (i=0;i<menu;i++) print Menu[i] print"<BR>" if (MAINPAGE) { MAINPAGE=Gsub(MAINPAGE) printf("<A HREF=\"%s\">HOME</A>\n", MAINPAGE) } print "</CENTER>" } if (FOOT && !NOFOOTER) PrintFooter() } (DEBUG) { print "<"$0">" } { Proc() } ## functions function PrintHeader(X) { print"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">" if (LANG) printf("<HTML LANG=\"%s\">\n", LANG) else print"<HTML>" print"<HEAD>" print"\t<!-- This HTML file has been created by plain2html.awk -->" if (CHARSET) printf("\t<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=%s\">\n", CHARSET) if (DESC) printf("\t<META NAME=\"description\" CONTENT=\"%s\">\n", DESC) if (KW) printf("\t<META NAME=\"keywords\" CONTENT=\"%s\">\n", KW) printf("\t<TITLE>%s</TITLE>\n", X) print"</HEAD>\n" if (BODY) print BODY; else "<BODY>" printf("<CENTER><H1><A NAME=\"ToP\">%s</A></H1></CENTER>\n", X) FOOT=1 TI="" } function PrintFooter() { if (FOOTERFILE) { FOOTERFILE=Gsub(FOOTERFILE) while ((getline < FOOTERFILE) > 0) Proc() } else { if (ADDRESS) { if (!NAME) NAME=ADDRESS printf("<ADDRESS><A HREF=\"mailto:%s\">%s</A></ADDRESS>\n", ADDRESS, NAME) } } print"<HR>" if (ROOT) printf("<FONT SIZE=-3>Converted by <A HREF=\"%s/scripts/plain2html.awk\">plain2html.awk</A> on %s</FONT>\n", ROOT, strftime("%Y-%m-%dT%T+0900")) else printf("<FONT SIZE=-3>Converted by plain2html.awk on %s</FONT>\n", strftime("%Y-%m-%dT%T+0900")) print"</body>" print"</HTML>" } function PrintSection(A, n) { if (A ~ "<A NAME=\"") { NTAG=substr(A, index(A, "<A NAME=\"")+9) NTAG=substr(NTAG, 1, index(NTAG, "\"")-1) printf("\n<H%i>%s</H%i>\n", n, A, n) } else { NTAG=A gsub("[ \t]+", "_", NTAG) if (NTAGS ~ ":"NTAG":") NTAG=sprintf("%s%03i", NTAG, NR) if (NTAG ~ /[^.,:;\+-_0-9A-Za-z]/) NTAG=sprintf("%s%03i", "t", NR) printf("\n<H%i><A NAME=\"%s\">%s</A></H%i>\n", n, NTAG, A, n) } NTAGS=NTAGS":"NTAG":" if (n==1) Menu[menu++]=sprintf("[<A HREF=\"#%s\"><BIG>%s</bIG></A>]", NTAG, A) if (n==2) Menu[menu++]=sprintf("[<A HREF=\"#%s\"><STRONG>%s</STRONG></A>]", NTAG, A) if (n==3) Menu[menu++]=sprintf("[<A HREF=\"#%s\">%s</A>]", NTAG, A) if (n==4) Menu[menu++]=sprintf("[<A HREF=\"#%s\"><SMALL>%s</SMALL></A>]", NTAG, A) } function PrintMenuTop(MENU) { if (NTAGS) return NTAGS=":"MENU":ToP" printf("<CENTER><A HREF=\"#%s\"><SMALL>go to menu</SMALL></A></CENTER>\n", MENU) } function Expand(EXTAG, EXn, EXIN) { # EXTAG{}..{} --> EX[i] # EXIN is splitted to EXX + (EXX[i], n=1,..EXn) + EXY if (substr(EXIN, 1, length(EXTAG))==EXTAG) EXX="" else EXX=substr(EXIN, 1, index(EXIN, EXTAG"{")-1) EXY=substr(EXIN, index(EXIN, EXTAG"{")+length(EXTAG)) EXi=1 # to split EX[i] + EXY while (EXY ~ /^{/ && EXi<=EXn) { EX[EXi]=EXY EXY=substr(EX[EXi], index(EX[EXi], "}")+1) EX[EXi]=substr(EX[EXi], 2, index(EX[EXi], "}")-2) while (TestBraces("{", "}", EX[EXi])) { #check looped {} EX[EXi]=sprintf("%s}%s", EX[EXi], substr(EXY, 1, index(EXY, "}")-1)) EXY=substr(EXY, index(EXY, "}")+1) } EXi++ } while (EXi<=EXn) EX[EXi++]="" } function TestBraces(TA, TB, TC) { return (gsub(TA, TA, TC) > gsub(TB, TB, TC)) } function ProcDL (){ if ($0=="/DL") { if (DD) print"\t</DD>\n" print "</DL>\n" DL=DD=0 $0="" return } if (substr($0, 1, 1)==LI) { if (DD) print"\t</DD>\n" $0=substr($0, 2) printf("\t<DT>%s</DT>\n\t<DD>", $0) } else { if ($0) { print DD=1 } else { if (DD) print"\t</DD>\n" DD=0 } } $0="" return } function SetCharset(Lang) { #CHARSET="us-ascii" CHARSET="ISO-8859-1" if (Lang=="ja") CHARSET="EUC-JP" if (Lang=="fr") CHARSET="ISO-8859-1" } function Gsub(gs) { if (BASE) gsub("@BASE/", BASE"/", gs) else gsub("@BASE/", "", gs) if (ROOT) gsub("@ROOT/", ROOT"/", gs) else gsub("@ROOT/", "", gs) if (LANG) gsub("@LANG", LANG, gs) else gsub("\\.@LANG", "", gs) return(gs) } function Proc() { if ($0 ~ "@"){ # for @xxx --> yyy substitutions $0=Gsub($0) } if ($0) { gsub("^[ \t]+", "") gsub("[ \t]+$", "") sub("\\\\\\\\$", "<BR>") while ($0 ~ "HREF{") { Expand("HREF", 2, $0) if (!EX[2]) EX[2]=EX[1] $0=sprintf("%s<A HREF=\"%s\">%s</A>%s", EXX, EX[1], EX[2], EXY) } while ($0 ~ "NAME{") { Expand("NAME", 2, $0) if (!EX[2]) EX[2]=EX[1] $0=sprintf("%s<A NAME=\"%s\">%s</A>%s", EXX, EX[1], EX[2], EXY) } while ($0 ~ "MARK{") { Expand("MARK", 3, $0) if (!EX[3]) { if (EX[1] ~ " ") EX[3]=substr(EXX, 1, index(EXX, " ")-1) else EX[3]=EX[1] } $0=sprintf("%s<%s>%s</%s>%s", EXX, EX[1], EX[2], EX[3], EXY) } } if (DL) {ProcDL()} if ($0=="" && TEXT && !LIST) { print "<P>\n" TEXT="" } if ($0 ~ /^\./) { A=substr($0, 2) X[1]=substr(A, 1, index(A" ", " ")-1) X[2]=substr(A, index(A, " ")+1) if (X[2]==0) X[2]="" if (X[1]=="TS") {TS=X[2]; $0=""} if (X[1]=="TH") {TH=X[2]; $0=""} if (X[1]=="NOTH") {NOTH=X[2]; $0=""} if (X[1]=="TABLEHEADER") {TABLEHEADER=X[2]; $0=""} if (X[1]=="TI") {TI=X[2]; $0=""} if (X[1]=="TBLANK") {TBLANK=X[2]; $0=""} if (X[1]=="TTAG") {TTAG=1; $0=""} if (X[1]=="CAPTION") {CAPTION=X[2]; $0=""} if (X[1]=="LANG") {LANG=X[2]; $0=""; SetCharset(LANG)} if (X[1]=="CHARSET") {CHARSET=X[2]; $0=""} if (X[1]=="DESC") {DESC=X[2]; $0=""} if (X[1]=="KW") {KW=X[2]; $0=""} if (X[1]=="BODY") {BODY=X[2]; $0=""} if (X[1]=="NOMENU") {MENU=0; $0=""} if (X[1]=="MENU") { MENU=X[2] PrintMenuTop(MENU) $0="" } if (X[1]=="SC") {SC[2]=X[2]; $0=""} if (X[1]=="SSC") {SC[3]=X[2]; $0=""} if (X[1]=="SSSC") {SC[4]=X[2]; $0=""} if (X[1]=="ADDRESS") {ADDRESS=X[2]; $0=""} if (X[1]=="NAME") {NAME=X[2]; $0=""} if (X[1]=="FOOTERFILE") {FOOTERFILE=X[2]; $0=""} if (X[1]=="OUT") {OUT=X[2]; $0=""} if (X[1]=="LISTTYPE") {LISTTYPE=X[2]; $0=""} if (X[1]=="LI") {LI=X[2]; $0=""} if (X[1]=="DL") {print "<DL>"; DL=1; $0=""} if (X[1]=="HREF") {HREF=X[2]; $0=""} if (X[1]=="INCLUDE") {# kanji code should be euc-jis INCLUDE=Gsub(X[2]) while ((getline < INCLUDE) > 0) Proc() $0="" } # for @xxx --> yyy substitutions if (X[1]=="BASE") {BASE=X[2]; $0=""} if (X[1]=="ROOT") {ROOT=X[2]; $0=""} # if (X[1]=="") =X[2] } if ($0 && PREAMBLE) {PREAMBLE=0} if (TI) { if (PREAMBLE || NR==1) { PrintHeader(TI) # PREAMBLE=0 if (MENU) PrintMenuTop(MENU) } else PrintSection(TI, 1) TI="" } if (SC[2]) { PrintSection(SC[2], 2); delete SC} if (SC[3]) { PrintSection(SC[3], 3); delete SC} if (SC[4]) { PrintSection(SC[4], 4); delete SC} if ($0=="" && LIST) { LIST=0 printf("</%s>\n\n", LISTTYPE) } if ($0 !~ TS && TABLE) { TABLE=0 print "</TABLE>" } if ($0 ~ TS) Table() if (substr($0, 1, 1)==LI) { $0=substr($0, 2) if (LIST==0) printf("<%s>\n", LISTTYPE) LIST=1 # printf("\t<LI>%s</LI>\n", $0) printf("\t<LI>%s\n", $0) $0="" } if (OUT && $0) { print if (!LIST) TEXT=1 } } function Table() { n=split($0, X, TS) if (substr($0, 1, 1)==TH || !TABLE) { if (substr($0, 1, 1)==TH) X[1]=substr(X[1], 2) if (TABLE) print "</TABLE>" print TABLEHEADER if (CAPTION) {printf("\t<CAPTION>%s<CAPTION>\n", CAPTION); CAPTION=""} TABLE=n SEP = "TH" if (NOTH) SEP="TD" for (i in X) { TALIGN[i]="" if (substr(X[i], 1, 1) == "=") { TALIGN[i] = "center"; X[i] = substr(X[i], 2) } else { if (substr(X[i], 1, 1) == ">") { TALIGN[i] = "right" X[i] = substr(X[i], 2) } } } } else SEP = "TD" print "\t<TR>" for (i=1; i<=TABLE; i++) { gsub("^[ \t]+", "", X[i]) gsub("[ \t]+$", "", X[i]) if (X[i]=="") X[i]=TBLANK BSEP=SEP if (i==1 && TTAG) { NTAG=X[i] gsub("[ \t]+", "_", NTAG) gsub("/", "%2E", NTAG) if (NTAGS ~ ":"NTAG":") NTAG=sprintf("%s%03i", NTAG, NR) if (NTAG ~ /[^*+\-.,0-9A-Z_a-z]/) NTAG=sprintf("%s%03i", "t", NR) NTAGS=NTAGS":"NTAG":" printf("\t\t<A NAME=\"%s\"> </A>\n", NTAG) } if (TALIGN[i]) BSEP=sprintf("%s ALIGN=\"%s\"", SEP, TALIGN[i]) if (i<=n) printf("\t\t<%s>%s</%s>\n", BSEP, X[i], SEP) else printf("\t\t<%s>%s</%s>\n", BSEP, TBLANK, SEP) } print "\t</TR>" $0="" }