#! /usr/local/bin/perl # # jLaTeX2HTML version 1.99+3.0 1999/9/15 # jLaTeX2HTML version 2.0 2003/01/25 shige # $Id: latex2html.pin,v 1.70 2002/08/22 15:14:08 RRM Exp $ # # Comprises patches and revisions by various authors: # See Changes, the log file of LaTeX2HTML. # # Original Copyright notice: # # LaTeX2HTML by Nikos Drakos # Japanese Patched: # jLaTeX2HTML by Kenshi Muto # jLaTeX2HTML copyright follows LaTeX2HTML copyright. # modified for latex2html-{2K.1beta,2002} by shige # Shigeharu TAKENO # **************************************************************** # LaTeX To HTML Translation ************************************** # **************************************************************** # LaTeX2HTML is a Perl program that translates LaTeX source # files into HTML (HyperText Markup Language). For each source # file given as an argument the translator will create a # directory containing the corresponding HTML files. # # The man page for this program is included at the end of this file # and can be viewed using "perldoc latex2html" # # For more information on this program and some examples of its # capabilities visit # # http://www.latex2html.org/ # # or see the accompanying documentation in the docs/ directory # # or # # http://www-texdev.ics.mq.edu.au/l2h/docs/manual/ # # or # # http://www.cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/ # # Original code written by Nikos Drakos, July 1993. # # Address: Computer Based Learning Unit # University of Leeds # Leeds, LS2 9JT # # Copyright (c) 1993-95. All rights reserved. # # # Extensively modified by Ross Moore, Herb Swan and others # # Address: Mathematics Department # Macquarie University # Sydney, Australia, 2109 # # Copyright (c) 1996-2001. All rights reserved. # # See general license in the LICENSE file. # ########################################################################## use 5.003; # refuse to work with old and buggy perl version #use strict; #use diagnostics; # include some perl packages; these come with the standard distribution use Getopt::Long; use Fcntl; use AnyDBM_File; # The following are global variables that also appear in some modules use vars qw($LATEX2HTMLDIR $LATEX2HTMLPLATDIR $SCRIPT %Month %used_icons $inside_tabbing $TABLE_attribs %mathentities $date_name $outer_math $TABLE__CELLPADDING_rx); BEGIN { # print "scanning for l2hdir\n"; if($ENV{'LATEX2HTMLDIR'}) { $LATEX2HTMLDIR = $ENV{'LATEX2HTMLDIR'}; } else { $ENV{'LATEX2HTMLDIR'} = $LATEX2HTMLDIR = '/usr/local/share/lib/latex2html'; } if($ENV{'LATEX2HTMLPLATDIR'}) { $LATEX2HTMLPLATDIR = $ENV{'LATEX2HTMLPLATDIR'}; } else { $LATEX2HTMLPLATDIR = '/usr/local/lib/latex2html'||$LATEX2HTMLDIR; $ENV{'LATEX2HTMLPLATDIR'} = $LATEX2HTMLPLATDIR; } if(-d $LATEX2HTMLPLATDIR) { push(@INC,$LATEX2HTMLPLATDIR); } if(-d $LATEX2HTMLDIR) { push(@INC,$LATEX2HTMLDIR); } else { die qq{Fatal: Directory "$LATEX2HTMLDIR" does not exist.\n}; } } use L2hos; # Operating system dependent routines # $^W = 1; # turn on warnings my $RELEASE = '2002-2-1'; my ($REVISION) = q$Revision: 1.70 $ =~ /:\s*(\S+)/; # The key, which delimts expressions defined in the environment # depends on the operating system. $envkey = L2hos->pathd(); # $dd is the directory delimiter character $dd = L2hos->dd(); # make sure the $LATEX2HTMLDIR is on the search-path for forked processes if($ENV{'PERL5LIB'}) { $ENV{'PERL5LIB'} .= "$envkey$LATEX2HTMLDIR" unless($ENV{'PERL5LIB'} =~ m|\Q$LATEX2HTMLDIR\E|o); } else { $ENV{'PERL5LIB'} = $LATEX2HTMLDIR; } # Local configuration, read at runtime # Read the $CONFIG_FILE (usually l2hconf.pm ) if($ENV{'L2HCONFIG'}) { require $ENV{'L2HCONFIG'} || die "Fatal (require $ENV{'L2HCONFIG'}): $!"; } else { eval 'use l2hconf'; if($@) { die "Fatal (use l2hconf): $@\n"; } } # MRO: Changed this to global value in config/config.pl # change these whenever you do a patch to this program and then # name the resulting patch file accordingly # $TVERSION = "2002-2-1"; #$TPATCHLEVEL = " beta"; #$TPATCHLEVEL = " release"; #$RELDATE = "(March 30, 1999)"; #$TEX2HTMLV_SHORT = $TVERSION . $TPATCHLEVEL; $JVERSION = "JA patch-2.0"; $TEX2HTMLV_SHORT = $RELEASE; $JTEX2HTMLV_SHORT = "$RELEASE $JVERSION"; $TEX2HTMLVERSION = "$TEX2HTMLV_SHORT ($REVISION)"; $JTEX2HTMLVERSION = "$TEX2HTMLV_SHORT ($REVISION) $JVERSION"; $TEX2HTMLADDRESS = "http://www.latex2html.org/"; #$JTEX2HTMLADDRESS = "http://www.topstudio.co.jp/~kmuto/software/latex2html/"; $JTEX2HTMLADDRESS = "http://takeno.iee.niit.ac.jp/~shige/TeX/latex2html/ltx2html.html"; $AUTHORADDRESS = "http://cbl.leeds.ac.uk/nikos/personal.html"; #$AUTHORADDRESS2 = "http://www-math.mpce.mq.edu.au/%7Eross/"; $AUTHORADDRESS2 = "http://www.maths.mq.edu.au/~ross/"; $JAUTHORADDRESS = "http://www.topstudio.co.jp/~kmuto/"; $JAUTHORADDRESS2 = "http://takeno.iee.niit.ac.jp/~shige/"; # Set $HOME to what the system considers the home directory $HOME = L2hos->home(); push(@INC,$HOME); # flush stdout with every print -- gives better feedback during # long computations $| = 1; # set Perl's subscript separator to LaTeX's illegal character. # (quite defensive but why not) $; = "\000"; # No arguments!! unless(@ARGV) { die "Error: No files to process!\n"; } # Image prefix $IMAGE_PREFIX = '_image'; # Partition prefix $PARTITION_PREFIX = 'part_' unless $PARTITION_PREFIX; # Author address @address_data = &address_data('ISO'); $ADDRESS = "$address_data[0]\n$address_data[1]"; # ensure non-zero defaults $MAX_SPLIT_DEPTH = 4 unless ($MAX_SPLIT_DEPTH); $MAX_LINK_DEPTH = 4 unless ($MAX_LINK_DEPTH); $TOC_DEPTH = 4 unless ($TOC_DEPTH); # A global value may already be set in the $CONFIG_FILE $INIT_FILE_NAME = $ENV{'L2HINIT_NAME'} || '.latex2html-init' unless $INIT_FILE_NAME; # Read the $HOME/$INIT_FILE_NAME if one is found if (-f "$HOME$dd$INIT_FILE_NAME" && -r _) { print "Note: Loading $HOME$dd$INIT_FILE_NAME\n"; require("$HOME$dd$INIT_FILE_NAME"); $INIT_FILE = "$HOME$dd$INIT_FILE_NAME"; # _MRO_TODO_: Introduce a version to be checked? die "Error: You have an out-of-date " . $HOME . "$dd$INIT_FILE_NAME file.\nPlease update or delete it.\n" if ($DESTDIR eq '.'); } # Read the $INIT_FILE_NAME file if one is found in current directory if ( L2hos->Cwd() ne $HOME && -f ".$dd$INIT_FILE_NAME" && -r _) { print "Note: Loading .$dd$INIT_FILE_NAME\n"; require(".$dd$INIT_FILE_NAME"); $INIT_FILE = "$INIT_FILE_NAME"; } die "Error: '.' is an incorrect setting for DESTDIR.\n" . "Please check your $INIT_FILE_NAME file.\n" if ($DESTDIR eq '.'); # User home substitutions $LATEX2HTMLSTYLES =~ s/~([$dd$dd$envkey]|$)/$HOME$1/go; # the next line fails utterly on non-UNIX systems $LATEX2HTMLSTYLES =~ s/~([^$dd$dd$envkey]+)/L2hos->home($1)/geo; #absolutise the paths $LATEX2HTMLSTYLES = join($envkey, map(L2hos->Make_directory_absolute($_), split(/$envkey/o, $LATEX2HTMLSTYLES))); #HWS: That was the last reference to HOME. Now set HOME to $LATEX2HTMLDIR, # to enable dvips to see that version of .dvipsrc! But only if we # have DVIPS_MODE not set - yes - this is a horrible nasty kludge # MRO: The file has to be updated by configure _MRO_TODO_ if ($PK_GENERATION && ! $DVIPS_MODE) { $ENV{HOME} = $LATEX2HTMLDIR; delete $ENV{PRINTER}; # Overrides .dvipsrc } # language of the DTD specified in the tag $ISO_LANGUAGE = 'EN' unless $ISO_LANGUAGE; # Save the command line arguments, quote where necessary $argv = join(' ', map {/[\s#*!\$%]/ ? "'$_'" : $_ } @ARGV); # Pre-process the command line for backward compatibility foreach(@ARGV) { s/^--?no_/-no/; # replace e.g. no_fork by nofork # s/^[+](\d+)$/$1/; # remove + in front of integers } # Process command line options my %opt; unless(GetOptions(\%opt, # all non-linked options go into %opt # option linkage (optional) 'help|h', 'version|V', 'split=s', 'link=s', 'toc_depth=i', \$TOC_DEPTH, 'toc_stars!', \$TOC_STARS, 'short_extn!', \$SHORTEXTN, 'iso_language=s', \$ISO_LANGUAGE, 'validate!', \$HTML_VALIDATE, 'latex!', 'djgpp!', \$DJGPP, 'fork!', \$CAN_FORK, 'external_images!', \$EXTERNAL_IMAGES, 'ascii_mode!', \$ASCII_MODE, 'lcase_tags!', \$LOWER_CASE_TAGS, 'ps_images!', \$PS_IMAGES, 'font_size=s', \$FONT_SIZE, 'tex_defs!', \$TEXDEFS, 'navigation!', 'top_navigation!', \$TOP_NAVIGATION, 'bottom_navigation!', \$BOTTOM_NAVIGATION, 'auto_navigation!', \$AUTO_NAVIGATION, 'index_in_navigation!', \$INDEX_IN_NAVIGATION, 'contents_in_navigation!', \$CONTENTS_IN_NAVIGATION, 'next_page_in_navigation!', \$NEXT_PAGE_IN_NAVIGATION, 'previous_page_in_navigation!', \$PREVIOUS_PAGE_IN_NAVIGATION, 'footnode!', 'numbered_footnotes!', \$NUMBERED_FOOTNOTES, 'prefix=s', \$PREFIX, 'auto_prefix!', \$AUTO_PREFIX, 'long_titles=i', \$LONG_TITLES, 'custom_titles!', \$CUSTOM_TITLES, 'title|t=s', \$TITLE, 'rooted!', \$ROOTED, 'rootdir=s', 'dir=s', \$FIXEDDIR, 'mkdir', \$MKDIR, 'address=s', \$ADDRESS, 'noaddress', 'subdir!', 'info=s', \$INFO, 'noinfo', 'auto_link!', 'reuse=i', \$REUSE, 'noreuse', 'antialias_text!', \$ANTI_ALIAS_TEXT, 'antialias!', \$ANTI_ALIAS, 'transparent!', \$TRANSPARENT_FIGURES, 'white!', \$WHITE_BACKGROUND, 'discard!', \$DISCARD_PS, 'image_type=s', \$IMAGE_TYPE, 'images!', 'accent_images=s', \$ACCENT_IMAGES, 'noaccent_images', 'style=s', \$STYLESHEET, 'parbox_images!', 'math!', 'math_parsing!', 'latin!', 'entities!', \$USE_ENTITY_NAMES, 'local_icons!', \$LOCAL_ICONS, 'scalable_fonts!', \$SCALABLE_FONTS, 'images_only!', \$IMAGES_ONLY, 'show_section_numbers!',\$SHOW_SECTION_NUMBERS, 'show_init!', \$SHOW_INIT_FILE, 'init_file=s', \$INIT_FILE, 'up_url=s', \$EXTERNAL_UP_LINK, 'up_title=s', \$EXTERNAL_UP_TITLE, 'down_url=s', \$EXTERNAL_DOWN_LINK, 'down_title=s', \$EXTERNAL_DOWN_TITLE, 'prev_url=s', \$EXTERNAL_PREV_LINK, 'prev_title=s', \$EXTERNAL_PREV_TITLE, 'index=s', \$EXTERNAL_INDEX, 'biblio=s', \$EXTERNAL_BIBLIO, 'contents=s', \$EXTERNAL_CONTENTS, 'external_file=s', \$EXTERNAL_FILE, 'short_index!', \$SHORT_INDEX, 'unsegment!', \$UNSEGMENT, 'debug!', \$DEBUG, 'tmp=s', \$TMP, 'ldump!', \$LATEX_DUMP, 'timing!', \$TIMING, 'verbosity=i', \$VERBOSITY, 'html_version=s', \$HTML_VERSION, 'strict!', \$STRICT_HTML, 'xbit!', \$XBIT_HACK, 'ssi!', \$ALLOW_SSI, 'php!', \$ALLOW_PHP, 'test_mode!' # undocumented switch )) { &usage(); exit 1; } # interpret options, check option consistency if(defined $opt{'split'}) { if ($opt{'split'} =~ /^(\+?)(\d+)$/) { $MAX_SPLIT_DEPTH = $2; if ($1) { $MAX_SPLIT_DEPTH *= -1; $REL_DEPTH = 1; } } else { &usage; die "Error: Unrecognised value for -split: $opt{'split'}\n"; } } if(defined $opt{'link'}) { if ($opt{'link'} =~ /^(\+?)(\d+)$/) { $MAX_LINK_DEPTH = $2; if ($1) { $MAX_LINK_DEPTH *= -1 } } else { &usage; die "Error: Unrecognised value for -link: $opt{'link'}\n"; } } unless ($ISO_LANGUAGE =~ /^[A-Z.]+$/) { die "Error: Language (-iso_language) must be uppercase and dots only: $ISO_LANGUAGE\n"; } if ($HTML_VALIDATE && !$HTML_VALIDATOR) { die "Error: Need a HTML_VALIDATOR when -validate is specified.\n"; } &set_if_false($NOLATEX,$opt{latex}); # negate the option... if ($ASCII_MODE || $PS_IMAGES) { $EXTERNAL_IMAGES = 1; } if ($FONT_SIZE && $FONT_SIZE !~ /^\d+pt$/) { die "Error: Font size (-font_size) must end with 'pt': $FONT_SIZE\n" } &set_if_false($NO_NAVIGATION,$opt{navigation}); &set_if_false($NO_FOOTNODE,$opt{footnode}); if (defined $TITLE && !length($TITLE)) { die "Error: Empty title (-title).\n"; } if ($opt{rootdir}) { $ROOTED = 1; $FIXEDDIR = $opt{rootdir}; } if ($FIXEDDIR && !-d $FIXEDDIR) { if ($MKDIR) { print "\n *** creating directory: $FIXEDDIR "; die "Failed: $!\n" unless (mkdir($FIXEDDIR, 0755)); # _TODO_ use File::Path to create a series of directories } else { &usage; die "Error: Specified directory (-rootdir, -dir) does not exist.\n"; } } &set_if_false($NO_SUBDIR, $opt{subdir}); &set_if_false($NO_AUTO_LINK, $opt{auto_link}); if ($opt{noreuse}) { $REUSE = 0; } unless(grep(/^\Q$IMAGE_TYPE\E$/o, @IMAGE_TYPES)) { die <<"EOF"; Error: No such image type '$IMAGE_TYPE'. This installation supports (first is default): @IMAGE_TYPES EOF } &set_if_false($NO_IMAGES, $opt{images}); if ($opt{noaccent_images}) { $ACCENT_IMAGES = ''; } if($opt{noaddress}) { $ADDRESS = ''; } if($opt{noinfo}) { $INFO = 0; } if($ACCENT_IMAGES && $ACCENT_IMAGES !~ /^[a-zA-Z,]+$/) { die "Error: Single word or comma-list of style words needed for -accent_images, not: $_\n"; } &set_if_false($NO_PARBOX_IMAGES, $opt{parbox_images}); &set_if_false($NO_SIMPLE_MATH, $opt{math}); if (defined $opt{math_parsing}) { $NO_MATH_PARSING = !$opt{math_parsing}; $NO_SIMPLE_MATH = !$opt{math_parsing} unless(defined $opt{math}); } &set_if_false($NO_ISOLATIN, $opt{latin}); if ($INIT_FILE) { if (-f $INIT_FILE && -r _) { print "Note: Initialising with file: $INIT_FILE\n" if ($DEBUG || $VERBOSITY); require($INIT_FILE); } else { die "Error: Could not find file (-init_file): $INIT_FILE\n"; } } foreach($EXTERNAL_UP_LINK, $EXTERNAL_DOWN_LINK, $EXTERNAL_PREV_LINK, $EXTERNAL_INDEX, $EXTERNAL_BIBLIO, $EXTERNAL_CONTENTS) { $_ ||= ''; # initialize s/~/~/g; # protect `~' } if($TMP && !(-d $TMP && -w _)) { die "Error: '$TMP' not usable as temporary directory.\n"; } if ($opt{help}) { L2hos->perldoc($SCRIPT); exit 0; } if ($opt{version}) { &banner(); exit 0; } if ($opt{test_mode}) { $TITLE = 'LaTeX2HTML Test Document'; $TEXEXPAND = "$PERL /private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}texexpand"; $PSTOIMG = "$PERL /private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}pstoimg"; $ICONSERVER = L2hos->path2URL("/private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}icons"); $TEST_MODE = 1; $RGBCOLORFILE = "/private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}styles${dd}rgb.txt"; $CRAYOLAFILE = "/private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}styles${dd}crayola.txt"; } if($DEBUG) { # make the OS-dependent functions more chatty, too $L2hos::Verbose = 1; } undef %opt; # not needed any more $FIXEDDIR = $FIXEDDIR || $DESTDIR || ''; # for backward compatibility if ($EXTERNAL_UP_TITLE xor $EXTERNAL_UP_LINK) { warn "Warning (-up_url, -up_title): Need to specify both a parent URL and a parent title!\n"; $EXTERNAL_UP_TITLE = $EXTERNAL_UP_LINK = ""; } if ($EXTERNAL_DOWN_TITLE xor $EXTERNAL_DOWN_LINK) { warn "Warning (-down_url, -down_title): Need to specify both a parent URL and a parent title!\n"; $EXTERNAL_DOWN_TITLE = $EXTERNAL_DOWN_LINK = ""; } # $NO_NAVIGATION = 1 unless $MAX_SPLIT_DEPTH; # Martin Wilck if ($MAX_SPLIT_DEPTH && $MAX_SPLIT_DEPTH < 0) { $MAX_SPLIT_DEPTH *= -1; $REL_DEPTH = 1; } if ($MAX_LINK_DEPTH && $MAX_LINK_DEPTH < 0) { $MAX_LINK_DEPTH *= -1; $LEAF_LINKS = 1; } $FOOT_FILENAME = 'footnode' unless ($FOOT_FILENAME); $NO_FOOTNODE = 1 unless ($MAX_SPLIT_DEPTH || $NO_FOOTNODE); $NO_SPLIT = 1 unless $MAX_SPLIT_DEPTH; # _MRO_TODO_: is this needed at all? $SEGMENT = $SEGMENTED = 0; $NO_MATH_MARKUP = 1; # specify the filename extension to use with the generated HTML files if ($SHORTEXTN) { $EXTN = ".htm"; } # for HTML files on CDROM elsif ($ALLOW_PHP) { $EXTN = ".php"; } # has PHP dynamic includes # with server-side includes (SSI) : elsif ($ALLOW_SSI && !$XBIT_HACK) { $EXTN = ".shtml"; } # ordinary names, valid also for SSI with XBit hack : else { $EXTN = ".html"; } $NODE_NAME = 'node' unless (defined $NODE_NAME); # space for temporary files # different to the $TMPDIR for image-generation # MRO: No directory should end with $dd! $TMP_ = "TMP"; $TMP_PREFIX = "l2h" unless ($TMP_PREFIX); # This can be set to 1 when using a version of dvips that is safe # from the "dot-in-name" bug. # _TODO_ this should be determined by configure #$DVIPS_SAFE = 1; $CHARSET = $charset || 'iso-8859-1'; #################################################################### # # If possible, use icons of the same type as generated images # if ($IMAGE_TYPE && defined %{"icons_$IMAGE_TYPE"}) { %icons = %{"icons_$IMAGE_TYPE"}; } #################################################################### # # Figure out what options we need to pass to DVIPS and store that in # the $DVIPSOPT variable. Also, scaling is taken care of at the # dvips level if PK_GENERATION is set to 1, so adjust SCALE_FACTORs # accordingly. # if ($SCALABLE_FONTS) { $PK_GENERATION = 0; $DVIPS_MODE = ''; } if ($PK_GENERATION) { if ($MATH_SCALE_FACTOR <= 0) { $MATH_SCALE_FACTOR = 2; } if ($FIGURE_SCALE_FACTOR <= 0) { $FIGURE_SCALE_FACTOR = 2; } my $saveMSF = $MATH_SCALE_FACTOR; my $saveFSF = $FIGURE_SCALE_FACTOR; my $desired_dpi = int($MATH_SCALE_FACTOR*75); $FIGURE_SCALE_FACTOR = ($METAFONT_DPI / 72) * ($FIGURE_SCALE_FACTOR / $MATH_SCALE_FACTOR) ; $MATH_SCALE_FACTOR = $METAFONT_DPI / 72; $dvi_mag = int(1000 * $desired_dpi / $METAFONT_DPI); if ($dvi_mag > 1000) { &write_warnings( "WARNING: Your SCALE FACTOR is too large for PK_GENERATION.\n" . " See $CONFIG_FILE for more information.\n"); } # RRM: over-sized scaling, using dvi-magnification if ($EXTRA_IMAGE_SCALE) { print "\n *** Images at $EXTRA_IMAGE_SCALE times resolution of displayed size ***\n"; $desired_dpi = int($EXTRA_IMAGE_SCALE * $desired_dpi+.5); print " desired_dpi = $desired_dpi METAFONT_DPI = $METAFONT_DPI\n" if $DEBUG; $dvi_mag = int(1000 * $desired_dpi / $METAFONT_DPI); $MATH_SCALE_FACTOR = $saveMSF; $FIGURE_SCALE_FACTOR = $saveFSF; } # no space after "-y", "-D", "-e" --- required by DVIPS under DOS ! my $mode_switch = "-mode $DVIPS_MODE" if $DVIPS_MODE; $DVIPSOPT .= " -y$dvi_mag -D$METAFONT_DPI $mode_switch -e5 "; } else { # no PK_GENERATION # if ($EXTRA_IMAGE_SCALE) { # &write_warnings( # "the \$EXTRA_IMAGE_SCALE feature requires either \$PK_GENERATION=1" # . " or the '-scalable_fonts' option"); # $EXTRA_IMAGE_SCALE = ''; # } # MRO: shifted to l2hconf #$DVIPSOPT .= ' -M'; } # end PK_GENERATION # The mapping from numbers to accents. # These are required to process the \accent command, which is found in # tables of contents whenever there is an accented character in a # caption or section title. Processing the \accent command makes # $encoded_*_number work properly (see &extract_captions) with # captions that contain accented characters. # I got the numbers from the plain.tex file, version 3.141. # Missing entries should be looked up by a native speaker. # Have a look at generate_accent_commands and $iso_8859_1_character_map. # MEH: added more accent types # MRO: only uppercase needed! %accent_type = ( '18' => 'grave', # \` '19' => 'acute', # `' '20' => 'caron', # \v '21' => 'breve', # \u '22' => 'macr', # \= '23' => 'ring', # '24' => 'cedil', # \c '94' => 'circ', # \^ '95' => 'dot', # \. '7D' => 'dblac', # \H '7E' => 'tilde', # \~ '7F' => 'uml', # \" ); &driver; exit 0; # clean exit, no errors ############################ Subroutines ################################## #check that $TMP is writable, if so create a subdirectory sub make_tmp_dir { &close_dbm_database if $DJGPP; # to save file-handles # determine a suitable temporary path # $TMPDIR = ''; my @tmp_try = (); push(@tmp_try, $TMP) if($TMP); push(@tmp_try, "$DESTDIR$dd$TMP_") if($TMP_); push(@tmp_try, $DESTDIR) if($DESTDIR); push(@tmp_try, L2hos->Cwd()); my $try; TempTry: foreach $try (@tmp_try) { next unless(-d $try && -w _); my $tmp = "$try$dd$TMP_PREFIX$$"; if(mkdir($tmp,0755)) { $TMPDIR=$tmp; last TempTry; } else { warn "Warning: Cannot create temporary directory '$tmp': $!\n"; } } $dvips_warning = <<"EOF"; Warning: There is a '.' in \$TMPDIR, $DVIPS will probably fail. Set \$TMP to use a /tmp directory, or rename the working directory. EOF die ($dvips_warning . "\n\$TMPDIR=$TMPDIR ***\n\n") if ($TMPDIR =~ /\./ && $DVIPS =~ /dvips/ && !$DVIPS_SAFE); &open_dbm_database if $DJGPP; } # MRO: set first parameter to the opposite of the second if second parameter is defined sub set_if_false { $_[0] = !$_[1] if(defined $_[1]); } sub check_for_dots { local($file) = @_; if ($file =~ /\.[^.]*\./) { die "\n\n\n *** Fatal Error --- but easy to fix ***\n" . "\nCannot have '.' in file-name prefix, else dvips fails on images" . "\nChange the name from $file and try again.\n\n"; } } # Process each file ... sub driver { local($FILE, $orig_cwd, %unknown_commands, %dependent, %depends_on , %styleID, %env_style, $bbl_cnt, $dbg, %numbered_section); # MRO: $texfilepath has to be global! local(%styles_loaded); $orig_cwd = L2hos->Cwd(); print "\n *** initialise *** " if ($VERBOSITY > 1); &initialise; # Initialise some global variables print "\n *** check modes *** " if ($VERBOSITY > 1); &ascii_mode if $ASCII_MODE; # Must come after initialization &titles_language($TITLES_LANGUAGE); &make_numbered_footnotes if ($NUMBERED_FOOTNOTES); $dbg = $DEBUG ? "-debug" : ""; $dbg .= (($VERBOSITY>2) ? " -verbose" : ""); #use the same hashes for all files in a batch local(%cached_env_img, %id_map, %symbolic_labels, %latex_labels) if ($FIXEDDIR && $NO_SUBDIR); local($MULTIPLE_FILES,$THIS_FILE); $MULTIPLE_FILES = 1+$#ARGV if $ROOTED; print "\n *** $MULTIPLE_FILES file".($MULTIPLE_FILES ? 's: ' : ': ') . join(',',@ARGV) . " *** " if ($VERBOSITY > 1); local(%section_info, %toc_section_info, %cite_info, %ref_files); foreach $FILE (@ARGV) { &check_for_dots($FILE) unless $DVIPS_SAFE; ++$THIS_FILE if $MULTIPLE_FILES; do { %section_info = (); %toc_section_info = (); %cite_info = (); %ref_files = (); } unless $MULTIPLE_FILES; local($bbl_nr) = 1; # The number of reused images and those in images.tex local($global_page_num) = (0) unless($FIXEDDIR && $NO_SUBDIR); # The number of images in images.tex local($new_page_num) = (0); # unless($FIXEDDIR && $NO_SUBDIR); local($pid, $sections_rx, , $outermost_level, %latex_body, $latex_body , %encoded_section_number , %verbatim, %new_command, %new_environment , %provide_command, %renew_command, %new_theorem , $preamble, $aux_preamble, $prelatex, @preamble); # must retain these when all files are in the same directory # else the images.pl and labels.pl files get clobbered unless ($FIXEDDIR && $NO_SUBDIR) { print "\nResetting image-cache" if ($#ARGV); local(%cached_env_img, %id_map, %symbolic_labels, %latex_labels) } ## AYS: Allow extension other than .tex and make it optional ($EXT = $FILE) =~ s/.*\.([^\.]*)$/$1/; if ( $EXT eq $FILE ) { $EXT = "tex"; $FILE =~ s/$/.tex/; } #RRM: allow user-customisation, dependent on file-name # e.g. add directories to $TEXINPUTS named for the file # --- idea due to Fred Drake &custom_driver_hook($FILE) if (defined &custom_driver_hook); # JCL(jcl-dir) # We need absolute paths for TEXINPUTS here, because # we change the directory if ($orig_cwd eq $texfilepath) { &deal_with_texinputs($orig_cwd); } else { &deal_with_texinputs($orig_cwd, $texfilepath); } ($texfilepath, $FILE) = &get_full_path($FILE); $texfilepath = '.' unless($texfilepath); die "Cannot read $texfilepath$dd$FILE \n" unless (-f "$texfilepath$dd$FILE"); # Tell texexpand which files we *don't* want to look at. $ENV{'TEXE_DONT_INCLUDE'} = $DONT_INCLUDE if $DONT_INCLUDE; # Tell texexpand which files we *do* want to look at, e.g. # home-brew style files $ENV{'TEXE_DO_INCLUDE'} = $DO_INCLUDE if $DO_INCLUDE; $FILE =~ s/\.[^\.]*$//; ## AYS $DESTDIR = ''; # start at empty if ($FIXEDDIR) { $DESTDIR = $FIXEDDIR unless ($FIXEDDIR eq '.'); if (($ROOTED)&&!($texfilepath eq $orig_cwd)) { $DESTDIR .= $dd . $FILE unless $NO_SUBDIR; }; } elsif ($texfilepath eq $orig_cwd) { $DESTDIR = ($NO_SUBDIR ? '.' : $FILE); } else { $DESTDIR = $ROOTED ? '.' : $texfilepath; $DESTDIR .= $dd . $FILE unless $NO_SUBDIR; } $PREFIX = "$FILE-" if $AUTO_PREFIX; print "\nOPENING $texfilepath$dd$FILE.$EXT \n"; ## AYS next unless (&new_dir($DESTDIR,'')); # establish absolute path to $DESTDIR $DESTDIR = L2hos->Make_directory_absolute($DESTDIR); &make_tmp_dir; print "\nNote: Working directory is $DESTDIR\n"; print "Note: Images will be generated in $TMPDIR\n\n"; # Need to clean up a bit in case there's garbage left # from former runs. if ($DESTDIR) { chdir($DESTDIR) || die "$!\n"; } if (opendir (TMP,$TMP_)) { foreach (readdir TMP) { L2hos->Unlink("TMP_$dd$_") unless (/^\.\.?$/); } closedir TMP; } &cleanup(1); unless(-d $TMP_) { mkdir($TMP_, 0755) || die "Cannot create directory '$TMP_': $!\n"; } chdir($orig_cwd); # RRM 14/5/98 moved this to occur earlier ## JCL(jcl-dir) ## We need absolute paths for TEXINPUTS here, because ## we change the directory # if ($orig_cwd eq $texfilepath) { # &deal_with_texinputs($orig_cwd); # } else { # &deal_with_texinputs($orig_cwd, $texfilepath); # } # This needs $DESTDIR to have been created ... print " *** calling `texexpand' ***" if ($VERBOSITY > 1); local($unseg) = ($UNSEGMENT ? "-unsegment " : ""); # does DOS need to check these here ? # die "File $TEXEXPAND does not exist or is not executable\n" # unless (-x $TEXEXPAND); L2hos->syswait("$TEXEXPAND $dbg -auto_exclude $unseg" . "-save_styles $DESTDIR$dd$TMP_${dd}styles " . ($TEXINPUTS ? "-texinputs $TEXINPUTS " : '' ) . (($VERBOSITY >2) ? "-verbose " : '' ) . "-out $DESTDIR$dd$TMP_$dd$FILE " . "$texfilepath$dd$FILE.$EXT") && die " texexpand failed: $!\n"; print STDOUT "\n *** `texexpand' done ***\n" if ($VERBOSITY > 1); chdir($DESTDIR) if $DESTDIR; $SIG{'INT'} = 'handler'; &open_dbm_database; &initialise_sections; print STDOUT "\n *** database open ***\n" if ($VERBOSITY > 1); if ($IMAGES_ONLY) { &make_off_line_images; } else { &rename_image_files; &load_style_file_translations; &make_language_rx; &make_raw_arg_cmd_rx; # &make_isolatin1_rx unless ($NO_ISOLATIN); &translate_titles; &make_sections_rx; print "\nReading ..."; if ($SHORT_FILENAME) { L2hos->Rename ("$TMP_$dd$FILE" ,"$TMP_$dd$SHORT_FILENAME" ); &slurp_input_and_partition_and_pre_process( "$TMP_$dd$SHORT_FILENAME"); } else { &slurp_input_and_partition_and_pre_process("$TMP_$dd$FILE"); } &add_preamble_head; # Create a regular expressions &set_depth_levels; &make_sections_rx; &make_order_sensitive_rx; &add_document_info_page if ($INFO && !(/\\htmlinfo/)); &add_bbl_and_idx_dummy_commands; &translate; # Destructive! } &style_sheet; &close_dbm_database; &cleanup(); #JCL: read warnings from file to $warnings local($warnings) = &get_warnings; print "\n\n*********** WARNINGS *********** \n$warnings" if ($warnings || $NO_IMAGES || $IMAGES_ONLY); &image_cache_message if ($NO_IMAGES || $IMAGES_ONLY); &image_message if ($warnings =~ /Failed to convert/io); undef $warnings; # JCL - generate directory index entry. # Yet, a hard link, cause Perl lacks symlink() on some systems. do { local($EXTN) = $EXTN; $EXTN =~ s/_\w+(\.html?)/$1/ if ($frame_main_name); local($from,$to) = (eval($LINKPOINT),eval($LINKNAME)); if (length($from) && length($to) && ($from ne $to)) { #frames may have altered $EXTN $from =~ s/$frame_main_name(\.html?)/$1/ if ($frame_main_name); $to =~ s/$frame_main_name(\.html?)/$1/ if ($frame_main_name); L2hos->Unlink($to); L2hos->Link($from,$to); } } unless ($NO_AUTO_LINK || !($LINKPOINT) || !($LINKNAME)); &html_validate if ($HTML_VALIDATE && $HTML_VALIDATOR); # Go back to the source directory chdir($orig_cwd); $TEST_MODE = $DESTDIR if($TEST_MODE); # save path $DESTDIR = ''; $OUT_NODE = 0 unless $FIXEDDIR; $STYLESHEET = '' if ($STYLESHEET =~ /^\Q$FILE./); } print "\nUnknown commands: ". join(" ",keys %unknown_commands) if %unknown_commands; ###MEH -- math support print "\nMath commands outside math: " . join(" ",keys %commands_outside_math) . "\n Output may look weird or may be faulty!\n" if %commands_outside_math; print "\nDone.\n"; if($TEST_MODE) { $TEST_MODE =~ s:[$dd$dd]+$::; print "\nTo view the results, point your browser at:\n", L2hos->path2URL(L2hos->Make_directory_absolute($TEST_MODE).$dd. "index$EXTN"),"\n"; } $end_time = time; $total_time = $end_time - $start_time; print STDOUT join(' ',"Timing:",$total_time,"seconds\n") if ($TIMING||$DEBUG||($VERBOSITY > 2)); $_; } sub open_dbm_database { # These are DBM (unix DataBase Management) arrays which are actually # stored in external files. They are used for communication between # the main process and forked child processes; print STDOUT "\n"; # this mysteriously prevents a core dump ! dbmopen(%verb, "$TMP_${dd}verb",0755); # dbmopen(%verbatim, "$TMP_${dd}verbatim",0755); dbmopen(%verb_delim, "$TMP_${dd}verb_delim",0755); dbmopen(%expanded,"$TMP_${dd}expanded",0755); # Holds max_id, verb_counter, verbatim_counter, eqn_number dbmopen(%global, "$TMP_${dd}global",0755); # Hold style sheet information dbmopen(%env_style, "$TMP_${dd}envstyles",0755); dbmopen(%txt_style, "$TMP_${dd}txtstyles",0755); dbmopen(%styleID, "$TMP_${dd}styleIDs",0755); # These next two are used during off-line image conversion # %new_id_map maps image id's to page_numbers of the images in images.tex # %image_params maps image_ids to conversion parameters for that image dbmopen(%new_id_map, "$TMP_${dd}ID_MAP",0755); dbmopen(%img_params, "$TMP_${dd}IMG_PARAMS",0755); dbmopen(%orig_name_map, "$TMP_${dd}ORIG_MAP",0755); $global{'max_id'} = ($global{'max_id'} | 0); &read_mydb(\%verbatim, "verbatim"); $global{'verb_counter'} = ($global{'verb_counter'} | 0); $global{'verbatim_counter'} = ($global{'verbatim_counter'} | 0); &read_mydb(\%new_command, "new_command"); &read_mydb(\%renew_command, "renew_command"); &read_mydb(\%provide_command, "provide_command"); &read_mydb(\%new_theorem, "new_theorem"); &read_mydb(\%new_environment, "new_environment"); &read_mydb(\%dependent, "dependent"); # &read_mydb(\%env_style, "env_style"); # &read_mydb(\%styleID, "styleID"); # MRO: Why should we use read_mydb instead of catfile? $preamble = &catfile(&_dbname("preamble"),1) || ''; $prelatex = &catfile(&_dbname("prelatex"),1) || ''; $aux_preamble = &catfile(&_dbname("aux_preamble"),1) || ''; &restore_critical_variables; } sub close_dbm_database { &save_critical_variables; dbmclose(%verb); undef %verb; # dbmclose(%verbatim); undef %verbatim; dbmclose(%verb_delim); undef %verb_delim; dbmclose(%expanded); undef %expanded; dbmclose(%global); undef %global; dbmclose(%env_style); undef %env_style; dbmclose(%style_id); undef %style_id; dbmclose(%new_id_map); undef %new_id_map; dbmclose(%img_params); undef %img_params; dbmclose(%orig_name_map); undef %orig_name_map; dbmclose(%txt_style); undef %txt_style; dbmclose(%styleID); undef %styleID; } sub clear_images_dbm_database { # # %new_id_map will be used by the off-line image conversion process # dbmclose(%new_id_map); dbmclose(%img_params); dbmclose(%orig_name_map); undef %new_id_map; undef %img_params; undef %orig_name_map; dbmopen(%new_id_map, "$TMP_${dd}ID_MAP",0755); dbmopen(%img_params, "$TMP_${dd}IMG_PARAMS",0755); dbmopen(%orig_name_map, "$TMP_${dd}ORIG_MAP",0755); } sub initialise_sections { local($key); foreach $key (keys %numbered_section) { $global{$key} = $numbered_section{$key}} } sub save_critical_variables { $global{'math_markup'} = $NO_MATH_MARKUP; $global{'charset'} = $CHARSET; $global{'charenc'} = $charset; $global{'language'} = $default_language; $global{'isolatin'} = $ISOLATIN_CHARS; $global{'unicode'} = $UNICODE_CHARS; if ($UNFINISHED_ENV) { $global{'unfinished_env'} = $UNFINISHED_ENV; $global{'replace_end_env'} = $REPLACE_END_ENV; } $global{'unfinished_comment'} = $UNFINISHED_COMMENT; if (@UNMATCHED_OPENING) { $global{'unmatched'} = join(',',@UNMATCHED_OPENING); } } sub restore_critical_variables { $NO_MATH_MARKUP = ($global{'math_markup'}| (defined $NO_MATH_MARKUP ? $NO_MATH_MARKUP:1)); $CHARSET = ($global{'charset'}| $CHARSET); $charset = ($global{'charenc'}| $charset); $default_language = ($global{'language'}| (defined $default_language ? $default_language:'english')); $ISOLATIN_CHARS = ($global{'isolatin'}| (defined $ISOLATIN_CHARS ? $ISOLATIN_CHARS:0)); $UNICODE_CHARS = ($global{'unicode'}| (defined $UNICODE_CHARS ? $UNICODE_CHARS:0)); if ($global{'unfinished_env'}) { $UNFINISHED_ENV = $global{'unfinished_env'}; $REPLACE_END_ENV = $global{'replace_end_env'}; } $UNFINISHED_COMMENT = $global{'unfinished_comment'}; if ($global{'unmatched'}) { @UNMATCHED_OPENING = split(',',$global{'unmatched'}); } # undef any renewed-commands... # so the new defs are read from %new_command local($cmd,$key,$code); foreach $key (keys %renew_command) { $cmd = "do_cmd_$key"; $code = "undef \&$cmd"; eval($code) if (defined &$cmd); if ($@) { print "\nundef \&do_cmd_$cmd failed"} } } #JCL: The warnings should have been handled within the DBM database. # Unfortunately if the contents of an array are more than ~900 (system # dependent) chars long then dbm cannot handle it and gives error messages. sub write_warnings { #clean my ($str) = @_; $str .= "\n" unless($str =~ /\n$/); print STDOUT "\n *** Warning: $str" if ($VERBOSITY > 1); my $warnings = ''; if(-f 'WARNINGS') { $warnings = &catfile('WARNINGS') || ''; } return () if ($warnings =~ /\Q$str\E/); if(open(OUT,">>WARNINGS")) { print OUT $str; close OUT; } else { print "\nError: Cannot append to 'WARNINGS': $!\n"; } } sub get_warnings { return &catfile('WARNINGS',1) || ''; } # MRO: Standardizing sub catfile { my ($file,$ignore) = @_; unless(open(CATFILE,"<$file")) { print "\nError: Cannot read '$file': $!\n" unless($ignore); return undef; } local($/) = undef; # slurp in whole file my $contents = ; close(CATFILE); $contents; } sub html_validate { my ($extn) = $EXTN; if ($EXTN !~ /^\.html?$/i) { $extn =~ s/^[^\.]*(\.html?)$/$1/; } print "\n *** Validating ***\n"; my @htmls = glob("*$extn"); my $file; foreach $file (@htmls) { system("$HTML_VALIDATOR $file"); } } sub lost_argument { local($cmd) = @_; &write_warnings("\nincomplete argument to command: \\$cmd"); } # These subroutines should have been handled within the DBM database. # Unfortunately if the contents of an array are more than ~900 (system # dependent) chars long then dbm cannot handle it and gives error messages. # So here we save and then read the contents explicitly. sub write_mydb { my ($db, $key, $str) = @_; &write_mydb_simple($db, "\n$mydb_mark#$key#$str"); } # generate the DB file name from the DB name sub _dbname { "$TMP_$dd$_[0]"; } sub write_mydb_simple { my ($db, $str) = @_; my $file = &_dbname($db); if(open(DB,">>$file")) { print DB $str; close DB; } else { print "\nError: Cannot append to '$file': $!\n"; } } sub clear_mydb { my ($db) = @_; my $file = &_dbname($db); if(open(DB,">$file")) { close DB; } else { print "\nError: Cannot clear '$file': $!\n"; } } # Assumes the existence of a DB file which contains # sequences of e.g. verbatim counters and verbatim contents. sub read_mydb { my ($dbref,$name) = @_; my $contents = &catfile(&_dbname($name),1); return '' unless(defined $contents); my @tmp = split(/\n$mydb_mark#([^#]*)#/, $contents); my $i = 1; # Ignore the first element at 0 print "\nDBM: $name open..." if ($VERBOSITY > 2); while ($i < scalar(@tmp)) { my $tmp1 = $tmp[$i]; my $tmp2 = $tmp[++$i]; $$dbref{$tmp1} = defined $tmp2 ? $tmp2 : ''; ++$i; }; $contents; } # Reads in a latex generated file (e.g. .bbl or .aux) # It returns success or failure # ****** and binds $_ in the caller as a side-effect ****** sub process_ext_file { local($ext) = @_; local($found, $extfile,$dum,$texpath); $extfile = $EXTERNAL_FILE||$FILE; local($file) = &fulltexpath("$extfile.$ext"); $found = 0; &write_warnings( "\n$extfile.$EXT is newer than $extfile.$ext: Please rerun latex" . ## AYS (($ext =~ /bbl/) ? " and bibtex.\n" : ".\n")) if ( ($found = (-f $file)) && &newer(&fulltexpath("$extfile.$EXT"), $file)); ## AYS if ((!$found)&&($extfile =~ /\.$EXT$/)) { $file = &fulltexpath("$extfile"); &write_warnings( "\n$extfile is newer than $extfile: Please rerun latex" . ## AYS (($ext =~ /bbl/) ? " and bibtex.\n" : ".\n")) if ( ($found = (-f $file)) && &newer(&fulltexpath("$extfile"), $file)); ## AYS } # check in other directories on the $TEXINPUTS paths if (!$found) { foreach $texpath (split /$envkey/, $TEXINPUTS ) { $file = "$texpath$dd$extfile.$ext"; last if ($found = (-f $file)); } } if ( $found ) { print "\nReading $ext file: $file ..."; # must allow @ within control-sequence names $dum = &do_cmd_makeatletter(); &slurp_input($file); if ($ext =~ /bbl/) { # remove the \newcommand{\etalchar}{...} since not needed s/^\\newcommand{\\etalchar}[^\n\r]*[\n\r]+//s; } &pre_process; &substitute_meta_cmds if (%new_command || %new_environment); if ($ext eq "aux") { my $latex_pathname = L2hos->path2latex($file); $aux_preamble .= "\\AtBeginDocument{\\makeatletter\n\\input $latex_pathname\n\\makeatother\n}\n"; local(@extlines) = split ("\n", $_); print " translating ".(0+@extlines). " lines " if ($VERBOSITY >1); local($eline,$skip_to); #$_ = ''; foreach $eline (@extlines) { if ($skip_to) { next unless ($eline =~ s/$O$skip_to$C//) } $skip_to = ''; # skip lines added for pdfTeX/hyperref compatibility next if ($eline =~ /^\\(ifx|else|fi|global \\let|gdef|AtEndDocument|let )/); # remove \index and \label commands, else invalid links may result $eline =~ s/\\(index|label)\s*($O\d+$C).*\2//g; if ($eline =~ /\\(old)?contentsline/) { do { local($_,$save_AUX) = ($eline,$AUX_FILE); $AUX_FILE = 0; &wrap_shorthand_environments; #footnote markers upset the numbering s/\\footnote(mark|text)?//g; $eline = &translate_environments($_); $AUX_FILE = $save_AUX; undef $_ }; } elsif ($eline =~ s/^\\\@input//) { &do_cmd__at_input($eline); $eline = ''; } elsif ($eline =~ s/^\\\@setckpt$O(\d+)$C//) { $skip_to = $1; next; } # $eline =~ s/$image_mark#([^#]+)#/print "\nIMAGE:",$img_params{$1},"\n";''/e; # $_ .= &translate_commands(&translate_environments($eline)); $_ .= &translate_commands($eline) if $eline; } undef @extlines; } elsif ($ext =~ /$caption_suffixes/) { local(@extlines) = split ("\n", $_); print " translating ".(0+@extlines). " lines "if ($VERBOSITY >1); local($eline); $_ = ''; foreach $eline (@extlines) { # remove \index and \label commands, else invalid links may result $eline =~ s/\\(index|label)\s*($O\d+$C).*\2//gso; if ($eline =~ /\\(old)?contentsline/) { do { local($_,$save_PREAMBLE) = ($eline,$PREAMBLE); $PREAMBLE = 0; &wrap_shorthand_environments; $eline = &translate_environments($_); $PREAMBLE = $save_PREAMBLE; undef $_ }; } $_ .= &translate_commands($eline); } undef @extlines; } else { print " wrapping " if ($VERBOSITY >1); &wrap_shorthand_environments; $_ = &translate_commands(&translate_environments($_)); print " translating " if ($VERBOSITY >1); } print "\n processed size: ".length($_)."\n" if($VERBOSITY>1); $dum = &do_cmd_makeatother(); } else { print "\n*** Could not find file: $file ***\n" if ($DEBUG) }; $found; } sub deal_with_texinputs { # The dot precedes all, this let's local files override always. # The dirs we want are given as parameter list. if(!$TEXINPUTS) { $TEXINPUTS = '.' } elsif ($TEXINPUTS =~ /^$envkey/) { $TEXINPUTS = '.'.$TEXINPUTS }; if ($ROOTED) {$TEXINPUTS .= "$envkey$FIXEDDIR"} $TEXINPUTS = &absolutize_path($TEXINPUTS); $ENV{'TEXINPUTS'} = join($envkey,".",@_,$TEXINPUTS,$ENV{'TEXINPUTS'}); } # provided by Fred Drake sub absolutize_path { my ($path) = @_; my $npath = ''; foreach $dir (split /$envkey/o, $path) { $npath .= L2hos->Make_directory_absolute($dir) . $envkey; } $npath =~ s/$envkey$//; $npath; } sub add_document_info_page { # Uses $outermost_level # Nasty race conditions if the next two are done in parallel local($X) = ++$global{'max_id'}; local($Y) = ++$global{'max_id'}; ###MEH -- changed for math support: no underscores in commandnames $_ = join('', $_ , (($MAX_SPLIT_DEPTH <= $section_commands{$outermost_level})? "\n
\n" : '') , "\\$outermost_level", "*" , "$O$X$C$O$Y$C\\infopagename$O$Y$C$O$X$C\n", , " \\textohtmlinfopage"); } # For each style file name in TMP_styles (generated by texexpand) look for a # perl file in $LATEX2HTMLDIR/styles and load it. sub load_style_file_translations { local($_, $style, $options, $dir); print "\n"; if ($TEXDEFS) { foreach $dir (split(/$envkey/,$LATEX2HTMLSTYLES)) { if (-f ($_ = "$dir${dd}texdefs.perl")) { print "\nLoading $_..."; require ($_); $styles_loaded{'texdefs'} = 1; last; } } } # packages automatically implemented local($auto_styles) = $AUTO_STYLES; $auto_styles .= 'array|' if ($HTML_VERSION > 3.1); $auto_styles .= 'tabularx|' if ($HTML_VERSION > 3.1); $auto_styles .= 'theorem|'; # these are not packages, but can appear as if class-options $auto_styles .= 'psamsfonts|'; $auto_styles .= 'noamsfonts|'; $auto_styles =~ s/\|$//; if(open(STYLES, "<$TMP_${dd}styles")) { while() { if(s/^\s*(\S+)\s*(.*)$/$style = $1; $options = $2;/eo) { &do_require_package($style); $_ = $DONT_INCLUDE; s/:/|/g; &write_warnings("No implementation found for style \`$style\'\n") unless ($styles_loaded{$style} || $style =~ /^($_)$/ || $style =~ /$auto_styles/); # MRO: Process options for packages &do_package_options($style,$options) if($options); } } close(STYLES); } else { print "\nError: Cannot read '$TMP_${dd}styles': $!\n"; } } ################## Weird Special case ################## # The new texexpand can be told to leave in \input and \include # commands which contain code that the translator should simply pass # to latex, such as the psfig stuff. These should still be seen by # TeX, so we add them to the preamble ... sub do_include_lines { while (s/$include_line_rx//o) { local($include_line) = &revert_to_raw_tex($&); &add_to_preamble ('include', $include_line); } } ########################## Preprocessing ############################ # JCL(jcl-verb) # The \verb declaration and the verbatim environment contain simulated # typed text and should not be processed. Characters such as $,\,{,and } # loose their special meanings and should not be considered when marking # brackets etc. To achieve this \verb declarations and the contents of # verbatim environments are replaced by markers. At the end the original # text is put back into the document. # The markers for verb and verbatim are different so that these commands # can be restored to what the raw input was just in case they need to # be passed to latex. sub pre_process { # Modifies $_; #JKR: We need support for some special environments. # This has to be here, because they might contain # structuring commands like \section etc. local(%comments); &pre_pre_process if (defined &pre_pre_process); s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds &replace_html_special_chars; # Remove fake environment which should be invisible to LaTeX2HTML. s/\001//m; s/[%]end\s*{latexonly}/\001/gom; s/[%]begin\s*{latexonly}([^\001]*)\001/%/gos; s/\001//m; &preprocess_alltt if defined(&preprocess_alltt); $KEEP_FILE_MARKERS = 1; if ($KEEP_FILE_MARKERS) { # if (s/%%% TEXEXPAND: \w+ FILE( MARKER)? (\S*).*/ # ''.qq|#$2#|."\n"/em) { # $_ = "#$2#\n". $_ }; #RRM: ignore \n at end of included file, else \par may result if (s/(\n{1,2})?%%% TEXEXPAND: \w+ FILE( MARKER)? (\S*).*\n?/ ($2?$1:"\n").''.qq|#$3#|."\n"/em) { $_ = "#$3#\n". $_ }; } else { s/%%% TEXEXPAND[^\n]*\n//gm; } # Move all LaTeX comments into a local list s/([ \t]*(^|\G|[^\\]))(%.*(\n[ \t]*|$))/print "%"; $comments{++$global{'verbatim_counter'}} = "$3"; &write_mydb("verbatim", $global{'verbatim_counter'}, $3); "$1$comment_mark".$global{'verbatim_counter'}."\n"/mge; # Remove the htmlonly-environment s/\\begin\s*{htmlonly}\s*\n?//gom; s/\\end\s*{htmlonly}\s*\n?//gom; # Remove enviroments which should be invisible to LaTeX2HTML. s/\n[^%\n]*\\end\s*{latexonly}\s*\n?/\001/gom; s/((^|\n)[^%\n]*)\\begin\s*{latexonly}([^\001]*)\001/$1/gom; s/\\end\s*{comment}\s*\n?/\001/gom; s/\\begin\s*{comment}([^\001]*)\001//gom; # this used to be earlier, but that can create problems with comments &wrap_other_environments if (%other_environments); # s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds local($next, $esc_del); &normalize_language_changes; # Patches by #JKR, #EI#, #JCL(jcl-verb) #protect \verb|\begin/end....| parts, for LaTeX documentation s/(\\verb\*?(.))\\(begin|end)/$1\003$3/g; local(@processedV); local($opt, $style_info,$before, $contents, $after, $env); while (($UNFINISHED_COMMENT)|| (/\\begin\s*($opt_arg_rx)?\s*\{($verbatim_env_rx|$keepcomments_rx)\}/o)) { ($opt, $style_info) = ($1,$2); $before=$contents=$after=$env=''; if ($UNFINISHED_COMMENT) { $UNFINISHED_COMMENT =~ s/([^:]*)::(\d+)/$env=$1;$after=$_; $before = join("",$unfinished_mark,$env,$2,"#");''/e; print "\nfound the lost \\end{$env}\n"; } #RRM: can we avoid copying long strings here ? # maybe this loop can be an s/.../../s with (.*?) # ($before, $after, $env) = ($`, $', $3) unless ($env); if (!($before =~ /\\begin(\s*\[[^\]]*\]\s*)?\{($verbatim_env_rx|$keepcomments_rx)\}/)) { push(@processedV,$before); print "'";$before = ''; } if ($after =~ /\s*\\end{$env[*]?}/) { # Must NOT use the s///o option!!! ($contents, $after) = ($`, $'); $contents =~ s/^\n+/\n/s; # $contents =~ s/\n+$//s; # re-insert comments $contents =~ s/$comment_mark(\d+)\n?/$comments{$1}/g; # $contents =~ s/$comment_mark(\d+)/$verbatim{$1}/g; # revert '\\ ' -> '\\' only once if ($env =~ /rawhtml|$keepcomments_rx/i) { $contents = &revert_to_raw_tex($contents); } else { $contents =~ s/([^\\](?:\\\\)*\\)([$html_escape_chars])/$1.&special($2)/geos; $contents =~ s/\\\\ /\\\\/go; } if ($env =~/$keepcomments_rx/) { $verbatim{++$global{'verbatim_counter'}} = "$contents"; } else { &write_mydb("verbatim", ++$global{'verbatim_counter'}, $contents); } # $verbatim{$global{'verbatim_counter'}} = "$contents" if ($env =~/$keepcomments_rx/); # $verbatim{$global{'verbatim_counter'}} = "$contents"; if ($env =~ /rawhtml|$keepcomments_rx/i) { if ($before) { $after = join("",$verbatim_mark,$env ,$global{'verbatim_counter'},"#",$after); } else { push (@processedV, join("",$verbatim_mark,$env ,$global{'verbatim_counter'},"#")); } } elsif ($env =~ /tex2html_code/) { if ($before) { $after = join("","\\begin", $opt, "\{verbatim_code\}" , $verbatim_mark,$env , $global{'verbatim_counter'},"#" , "\\end\{verbatim_code\}",$after); } else { push (@processedV , join("","\\begin", $opt, "\{verbatim_code\}" , $verbatim_mark,$env , $global{'verbatim_counter'},"#" , "\\end\{verbatim_code\}")); } } else { if ($before) { $after = join("","\\begin", $opt, "\{tex2html_preform\}" , $verbatim_mark,$env , $global{'verbatim_counter'},"#" , "\\end\{tex2html_preform\}",$after); } else { push (@processedV , join("","\\begin", $opt, "\{tex2html_preform\}" , $verbatim_mark,$env , $global{'verbatim_counter'},"#" , "\\end\{tex2html_preform\}" )); } } } else { print "Cannot find \\end{$env}\n"; $after =~ s/$comment_mark(\d+)\n?/$comments{$1}/g; # $after =~ s/$comment_mark(\d+)/$verbatim{$1}/g; if ($env =~ /rawhtml|$keepcomments_rx/i) { $after = &revert_to_raw_tex($contents); } else { $after =~ s/([^\\](?:\\\\)*\\)([$html_escape_chars])/$1.&special($2)/geos; $after =~ s/\\\\ /\\\\/go; } if ($env =~/$keepcomments_rx/) { $verbatim{++$global{'verbatim_counter'}} = "$after"; } else { &write_mydb("verbatim", ++$global{'verbatim_counter'}, $after ); } $after = join("",$unfinished_mark,$env ,$global{'verbatim_counter'},"#"); } $_ = join("",$before,$after); } print STDOUT "\nsensitive environments found: ".(int(0+@processedV/2))." " if((@processedV)&&($VERBOSITY > 1)); $_ = join('',@processedV, $_); undef @processedV; #restore \verb|\begin/end....| parts, for LaTeX documentation # $_ =~ s/(\\verb\W*?)\003(begin|end)/$1\\$2/g; $_ =~ s/(\\verb(;SPM\w+;|\W*?))\003(begin|end)/$1\\$3/g; # Now do the \verb declarations # Patches by: #JKR, #EI#, #JCL(jcl-verb) # Tag \verb command and legal opening delimiter with unique number. # Replace tagged ones and its contents with $verb_mark & id number if the # closing delimiter can be found. After no more \verb's are to tag, revert # tagged one's to the original pattern. local($del,$contents,$verb_rerun); local($id) = $global{'verb_counter'}; # must tag only one alternation per loop ##RRM: can this be speeded up using a list ?? my $vbmark = $verb_mark; while (s/\\verb(\t*\*\t*)(\S)/"$2"/e || s/\\verb()(\;SPM\w+\;|[^a-zA-Z*\s])/"$2"/e || s/\\verb(\t\t*)([^*\s])/"$2"/e) { $del = $2; #RRM: retain knowledge of whether \verb* or \verb $vb_mark = ($1 =~/^\s*\*/? $verbstar_mark : $verb_mark); $esc_del = &escape_rx_chars($del); $esc_del = '' if (length($del) > 2); # try to find closing delimiter and substitute the complete # statement with $verb_mark or $verbstar_mark # s/(]*$id>[\Q$del\E])([^$esc_del\n]*)([\Q$del\E]|$comment_mark(\d+)\n?)/ s/(]*$id>\Q$del\E)([^$esc_del\n]*?)(\Q$del\E|$comment_mark(\d+)\n?)/ $contents=$2; if ($4) { $verb_rerun = 1; join('', "\\verb$del", $contents, $comments{$4}) } else { $contents =~ s|\\\\ |\\\\|g; $contents =~ s|\n| |g; $verb{$id}=$contents; $verb_delim{$id}=$del; join('',$vb_mark,$id,$verb_mark) } /e; } $global{'verb_counter'} = $id; # revert changes to fake verb statements s/]*)\d+>/\\verb$1/g; #JKR: the comments include the linebreak and the following whitespace # s/([^\\]|^)(%.*\n[ \t]*)+/$1/gom; # Remove Comments but not % which may be meaningful s/((^|\n)$comment_mark(\d+))+//gom; # Remove comment markers on new lines, but *not* the trailing \n s/(\\\w+|(\W?))($comment_mark\d*\n?)/($2)? $2.$3:($1? $1.' ':'')/egm; # Remove comment markers, not after braces # s/(\W?)($comment_mark\d*\n?)/($1)? $1.$2:''/egm; # Remove comment markers, not after braces # Remove comment markers, but *not* the trailing \n # HWS: Correctly remove multiple %%'s. # s/\\%/\002/gm; # s/(%.*\n[ \t]*)//gm; s/(%[^\n]*\n)[ \t]*/$comment_mark\n/gm; s/\002/\\%/gm; local($tmp1,$tmp2); s/^$unfinished_mark$keepcomments_rx(\d+)#\n?$verbatim_mark$keepcomments_rx(\d+)#/ $verbatim{$4}."\n\\end{$1}"/egm; # Raw TeX s/$verbatim_mark$keepcomments_rx(\d+)#/ $tmp1 = $1; $tmp2 = &protect_after_comments($verbatim{$2}); $tmp2 =~ s!\n$!!s; join ('', "\\begin{$tmp1}" , $tmp2 , "\n\\end{$tmp1}" )/egm; # Raw TeX s/$unfinished_mark$keepcomments_rx(\d+)#/$UNFINISHED_COMMENT="$1::$2"; "\\begin{$1}\n".$verbatim{$2}/egm; # Raw TeX $KEEP_FILE_MARKERS = 1; if ($KEEP_FILE_MARKERS) { s/%%% TEXEXPAND: \w+ FILE( MARKER) (\S*).*\n/ ''.qq|#.$2#\n|/gem; } else { s/%%% TEXEXPAND[^\n]*\n//gm; } &mark_string($_); # attempt to remove the \html \latex and \latexhtml commands s/\\latex\s*($O\d+$C)(.*)\1//gm; s/\\latexhtml\s*($O\d+$C)(.*)\1\s*($O\d+$C)(.*)\3/$4/sg; s/\\html\s*($O\d+$C)(.*)\1/$2/sg; s/\\html\s*($O\d+$C)//gm; # &make_unique($_); } # RRM: When comments are retained, then ensure that they are benign # by removing \s and escaping braces, # so that environments/bracing cannot become unbalanced. sub protect_after_comments { my ($verb_text) = @_; # $verb_text =~ s/\%(.*)/'%'.&protect_helper($1)/eg; $verb_text =~ s/(^|[^\\])(\\\\)*\%(.*)/$1.$2.'%'.&protect_helper($3)/emg; $verb_text; } sub protect_helper { my ($text) = @_; $text =~ s/\\/ /g; $text =~ s/(\{|\})/\\$1/g; $text; } sub make_comment { local($type,$_) = @_; $_ =~ s/\\(index|label)\s*(($O|$OP)\d+($C|$CP)).*\2//sg; $_ = &revert_to_raw_tex($_); s/^\n+//m; $_ =~ s/\\(index|label)\s*\{.*\}//sg; s/\-\-/- -/g; s/\-\-/- -/g; # cannot have -- inside a comment $_ = join('', '" ); $verbatim{++$global{'verbatim_counter'}} = $_; &write_mydb('verbatim', $global{'verbatim_counter'}, $_ ); join('', $verbatim_mark, 'verbatim' , $global{'verbatim_counter'},'#') } sub wrap_other_environments { local($key, $env, $start, $end, $opt_env, $opt_start); foreach $key (keys %other_environments) { # skip bogus entries next unless ($env = $other_environments{$key}); $key =~ s/:/($start,$end)=($`,$');':'/e; if (($end =~ /^\#$/m) && ($start =~ /^\#/m)) { # catch Indica pre-processor language switches $opt_start = $'; if ($env =~ s/\[(\w*)\]//o) { $opt_env = join('','[', ($1 ? $1 : $opt_start ), ']'); } local($next); while ($_ =~ /$start\b/) { push(@pre_wrapped, $`, "\\begin\{pre_$env\}", $opt_env ); $_=$'; if (/(\n*)$end/) { push(@pre_wrapped, $`.$1,"\\end\{pre_$env\}$1"); $_ = $'; if (!(s/^N(IL)?//o)) {$_ = '#'.$_ } } else { print "\n *** unclosed $start...$end chunk ***\n"; last; } } $_ = join('', @pre_wrapped, $_); undef @pre_wrapped; } elsif (($end=~/^\n$/) && ($start =~ /^\#/)) { # catch ITRANS pre-processor language info; $env = 'nowrap'; local($ilang) = $start; $ilang =~ s/^\#//m; s/$start\s*\=([^<\n%]*)\s*($comment_mark\d*|\n|%)/\\begin\{tex2html_$env\}\\ITRANSinfo\{$ilang\}\{$1\}\n\\end\{tex2html_$env\}$2/g; } elsif (!$end &&($start =~ /^\#/m)) { # catch Indica pre-processor input-mode switches s/$start(.*)\n/\\begin\{tex2html_$env\}$&\\end\{tex2html_$env\}\n/g; } elsif (($start eq $end)&&(length($start) == 1)) { $start =~ s/(\W)/\\$1/; $end = $start; s/([^$end])$start([^$end]+)$end/$1\\begin\{pre_$env\}$2\\end\{pre_$env\}/mg; } elsif ($start eq $end) { if (!($start =~ /\#\#/)) { $start =~ s/(\W)/\\$1/g; $end = $start; } local (@pre_wrapped); local($opt); $opt = '[indian]' if ($start =~ /^\#\#$/m); while ($_ =~ /$start/s) { push(@pre_wrapped, $` , "\\begin\{pre_$env\}$opt"); $_=$'; if (/$end/s) { push(@pre_wrapped, $`, "\\end\{pre_$env\}"); $_ = $'; } else { print "\n *** unclosed $start...$end chunk ***\n"; last; } } $_ = join('', @pre_wrapped, $_); undef @pre_wrapped; } elsif ($start && ($env =~ /itrans/)) { # ITRANS is of this form local($indic); if($start =~ /\#(\w+)$/m) {$indic = $1} #include the language-name as an optional parameter s/$start\b/\\begin\{pre_$env\}\[$indic\]/sg; s/$end\b/\\end\{pre_$env\}/sg; } elsif (($start)&&($end)) { s/$start\b/\\begin\{pre_$env\}/sg; s/$end\b/\\end\{pre_$env\}/sg; } } $_; } #################### Marking Matching Brackets ###################### # Reads the entire input file and performs pre_processing operations # on it before returning it as a single string. The pre_processing is # done on separate chunks of the input file by separate Unix processes # as determined by LaTeX \input commands, in order to reduce the memory # requirements of LaTeX2HTML. sub slurp_input_and_partition_and_pre_process { local($file) = @_; local(%string, @files, $pos); local ($count) = 1; unless(open(SINPUT,"<$file")) { die "\nError: Cannot read '$file': $!\n"; } local(@file_string); print STDOUT "$file" if ($VERBOSITY >1); while () { if (/TEXEXPAND: INCLUDED FILE MARKER (\S*)/) { # Forking seems to screw up the rest of the input stream # We save the current position ... $pos = tell SINPUT; print STDOUT " fork at offset $pos " if ($VERBOSITY >1); $string{'STRING'} = join('',@file_string); @file_string = (); &write_string_out($count); delete $string{'STRING'}; # ... so that we can return to it seek(SINPUT, $pos, 0); print STDOUT "\nDoing $1 "; ++$count} else { # $string{'STRING'} .= $_ push(@file_string,$_); } } $string{'STRING'} = join('',@file_string); @file_string = (); &write_string_out($count); delete $string{'STRING'}; close SINPUT; @files = (); if(opendir(DIR, $TMP_)) { @files = sort grep(/^\Q$PARTITION_PREFIX\E\d+/, readdir(DIR)); closedir(DIR); } unless(@files) { die "\nFailed to read in document parts.\n". "Look up section Globbing in the troubleshooting manual.\n"; } $count = 0; foreach $file (@files) { print STDOUT "\nappending file: $TMP_$dd$file " if ($VERBOSITY > 1); $_ .= (&catfile("$TMP_$dd$file") || ''); print STDOUT "\ntotal length: ".length($_)." characters\n" if ($VERBOSITY > 1); } die "\nFailed to read in document parts (out of memory?).\n" unless length($_); print STDOUT "\ntotal length: ".length($_)." characters\n" if ($VERBOSITY > 1); } sub write_string_out { local($count) = @_; if ($count < 10) {$count = '00'.$count} elsif ($count < 100) {$count = '0'.$count} local($pid); # All open unflushed streams are inherited by the child. If this is # not set then the parent will *not* wait $| = 1; # fork returns 0 to the child and PID to the parent &write_mydb_simple("prelatex", $prelatex); &close_dbm_database; unless ($CAN_FORK) { &do_write_string_out; } else { unless ($pid = fork) { &do_write_string_out; exit 0; }; waitpid($pid,0); } &open_dbm_database; } sub do_write_string_out { local($_); close (SINPUT) if($CAN_FORK); &open_dbm_database; $_ = delete $string{'STRING'}; # locate blank-lines, for paragraphs. # Replace verbatim environments etc. &pre_process; # locate the blank lines for \par s &substitute_pars; # Handle newcommand, newenvironment, newcounter ... &substitute_meta_cmds; &wrap_shorthand_environments; print STDOUT "\n *** End-of-partition ***" if ($VERBOSITY > 1); if(open(OUT, ">$TMP_$dd$PARTITION_PREFIX$count")) { print OUT $_; close(OUT); } else { print "\nError: Cannot write '$TMP_$dd$PARTITION_PREFIX$count': $!\n"; } print STDOUT $_ if ($VERBOSITY > 9); $preamble = join("\n",$preamble,@preamble); # undef @preamble; &write_mydb_simple("preamble", $preamble); # this was done earlier; it should not be repeated #&write_mydb_simple("prelatex", $prelatex); &write_mydb_simple("aux_preamble", $aux_preamble); &close_dbm_database; } # Reads the entire input file into a # single string. sub slurp_input { local($file) = @_; local(%string); if(open(INPUT,"<$file")) { local(@file_string); while () { push(@file_string, $_ ); } $string{'STRING'} = join('',@file_string); close INPUT; undef @file_string; } else { print "\nError: Cannot read '$file': $!\n"; } $_ = delete $string{'STRING'}; # Blow it away and return the result } # MRO: make them more efficient sub special { $html_specials{$_[0]} || $_[0]; } sub special_inv { $html_specials_inv{$_[0]} || $_[0]; } sub special_html { $html_special_entities{$_[0]} || $_[0]; } sub special_html_inv { $html_spec_entities_inv{$_[0]} || $_[0]; } # Mark each matching opening and closing bracket with a unique id. sub mark_string { # local (*_) = @_; # Modifies $_ in the caller; # -> MRO: changed to $_[0] (same effect) # MRO: removed deprecated $*, replaced by option /m $_[0] =~ s/(^|[^\\])\\{/$1tex2html_escaped_opening_bracket/gom; $_[0] =~ s/(^|[^\\])\\{/$1tex2html_escaped_opening_bracket/gom; # repeat this $_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom; $_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom; # repeat this my $id = $global{'max_id'}; my $prev_id = $id; # mark all balanced braces # MRO: This should in fact mark all of them as the hierarchy is # processed inside-out. 1 while($_[0] =~ s/{([^{}]*)}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo); # What follows seems esoteric... my @processedB = (); # Take one opening brace at a time while ($_[0] =~ /\{/) { my ($before,$after) = ($`,$'); my $change = 0; while (@UNMATCHED_OPENING && $before =~ /\}/) { my $this = pop(@UNMATCHED_OPENING); print "\n *** matching brace \#$this found ***\n"; $before =~ s/\}/join("",$O,$this,$C)/eo; $change = 1; } $_[0] = join('',$before,"\{",$after) if($change); # MRO: mark one opening brace if($_[0] =~ s/^([^{]*){/push(@processedB,$1);join('',$O,++$id,$C)/eos) { $before=''; $after=$'; } if ($after =~ /\}/) { $after =~ s/\}/join("",$O,$id,$C)/eo; $_[0] = join('',$before,$O,$id,$C,$after); } else { print "\n *** opening brace \#$id is unmatched ***\n"; $after =~ /^(.+\n)(.+\n)?/; print " preceding: $after \n"; push (@UNMATCHED_OPENING,$id); } } $_[0] = join('',@processedB,$_[0]); undef(@processedB); print STDOUT "\nInfo: bracketings found: ", $id - $prev_id,"\n" if ($VERBOSITY > 1); # process remaining closing braces while (@UNMATCHED_OPENING && $_[0] =~ /\}/) { my $this = pop(@UNMATCHED_OPENING); print "\n *** matching brace \#$this found ***\n"; $_[0] =~ s/\}/join("",$O,$this,$C)/eo; } while ($_[0] =~ /\}/) { print "\n *** there was an unmatched closing \} "; my ($beforeline,$prevline,$afterline) = ($`, $`.$& , $'); $prevline =~ /\n([^\n]+)\}$/m; if ($1) { print "at the end of:\n" . $1 . "\}\n\n"; } else { $afterline =~ /^([^\n]+)\n/m; if ($1) { print "at the start of:\n\}" . $1 ."\n\n"; } else { $prevline =~ /\n([^\n]+)\n\}$/m; print "on a line by itself after:\n" . $1 . "\n\}\n\n"; } } $_[0] = $beforeline . $afterline; } $global{'max_id'} = $id; # restore escaped braces $_[0] =~ s/tex2html_escaped_opening_bracket/\\{/go; $_[0] =~ s/tex2html_escaped_closing_bracket/\\}/go; } sub replace_html_special_chars { # Replaces html special characters with markers unless preceded by "\" s/([^\\])(<|>|&|\"|``|'')/&special($1).&special($2)/geom; # MUST DO IT AGAIN JUST IN CASE THERE ARE CONSECUTIVE HTML SPECIALS s/([^\\])(<|>|&|\"|``|'')/&special($1).&special($2)/geom; s/^(<|>|&|\"|``|'')/&special($1)/geom; } # used in \verbatiminput only: $html_escape_chars = '<>&'; sub replace_all_html_special_chars { s/([$html_escape_chars])/&special($1)/geom; } # The bibliography and the index should be treated as separate sections # in their own HTML files. The \bibliography{} command acts as a sectioning command # that has the desired effect. But when the bibliography is constructed # manually using the thebibliography environment, or when using the # theindex environment it is not possible to use the normal sectioning # mechanism. This subroutine inserts a \bibliography{} or a dummy # \textohtmlindex command just before the appropriate environments # to force sectioning. sub add_bbl_and_idx_dummy_commands { local($id) = $global{'max_id'}; s/([\\]begin\s*$O\d+$C\s*thebibliography)/$bbl_cnt++; $1/eg; ## if ($bbl_cnt == 1) { s/([\\]begin\s*$O\d+$C\s*thebibliography)/$id++; "\\bibliography$O$id$C$O$id$C $1"/geo; #} $global{'max_id'} = $id; s/([\\]begin\s*$O\d+$C\s*theindex)/\\textohtmlindex $1/o; s/[\\]printindex/\\textohtmlindex /o; &lib_add_bbl_and_idx_dummy_commands() if defined(&lib_add_bbl_and_idx_dummy_commands); } # Uses and modifies $default_language # This would be straight-forward except when there are # \MakeUppercase, \MakeLowercase or \uppercase , \lowercase commands # present in the source. The cases have to be adjusted before the # ISO-character code is set; e.g. with "z --> "Z in german.perl # sub convert_iso_latin_chars { local($_) = @_; local($next_language, $pattern); local($xafter, $before, $after, $funct, $level, $delim); local(@case_processed); while (/$case_change_rx/) { $xafter = $2; # $before .= $`; push(@case_processed, $`); $funct = $3; $after = ''; $_ = $'; if ($xafter =~ /noexpand/) { $before .= "\\$funct"; next; } s/^[\s%]*(.)/$delim=$1;''/eo; if ($delim =~ /{/ ) { # brackets not yet numbered... # $before .= $funct . $delim; push(@case_processed, $funct . $delim); $level = 1; $after = $delim; while (($level)&&($_)&&(/[\{\}]/)) { $after .= $` . $&; $_ = $'; if ( "$&" eq "\{" ) {$level++} elsif ( "$&" eq "\}" ) { $level-- } else { print $_ } print "$level"; } # $before .= $after; push(@case_processed, $after); } elsif ($delim eq "<") { # brackets numbered, but maybe not processed... s/((<|#)(\d+)(>|#)>).*\1//; $after .= $delim . $&; $_ = $'; print STDOUT "\n<$2$funct$4>" if ($VERBOSITY > 2); $funct =~ s/^\\//o; local($cmd) = "do_cmd_$funct"; $after = &$cmd($after); # $before .= $after; push(@case_processed, $after); } elsif (($xafter)&&($delim eq "\\")) { # preceded by \expandafter ... # ...so expand the following macro first $funct =~ s/^\\//o; local($case_change) = $funct; s/^(\w+|\W)/$funct=$1;''/eo; local($cmd) = $funct; local($thiscmd) = "do_cmd_$funct"; if (defined &$thiscmd) { $_ = &$thiscmd($_) } elsif ($new_command{$funct}) { local($argn, $body, $opt) = split(/:!:/, $new_command{$funct}); do { ### local($_) = $body; &make_unique($body); } if ($body =~ /$O/); if ($argn) { do { local($before) = ''; local($after) = "\\$funct ".$_; $after = &substitute_newcmd; # may change $after $after =~ s/\\\@#\@\@/\\/o ; } } else { $_ = $body . $_; } } else { print "\nUNKNOWN COMMAND: $cmd "; } $cmd = $case_change; $case_change = "do_cmd_$cmd"; if (defined &$case_change) { $_ = &$case_change($_) } } else { # this should not happen, but just in case... $funct =~ s/^\\//o; local($cmd) = "do_cmd_$funct"; print STDOUT "\n\n<$delim$funct>" if ($VERBOSITY > 2); $_ = join('', $delim , $_ ); if (defined &$cmd) { $_ = &$cmd($_) } } } # $_ = join('', $before, $_) if ($before); $_ = join('', @case_processed, $_) if (@case_processed); # ...now do the conversions ($before, $after, $funct) = ('','',''); @case_processed = (); if (/$language_rx/o) { ($next_language, $pattern, $before, $after) = (($2||$1), $&, $`, $'); $before = &convert_iso_latin_chars($before) if ($before); # push(@case_processed, $pattern, $before); local($br_id) = ++$global{'max_id'}; $pattern = join('' , '\selectlanguage', $O.$br_id.$C , (($pattern =~ /original/) ? $TITLES_LANGUAGE : $next_language ) , $O.$br_id.$C ); push(@case_processed, $before, $pattern); push(@language_stack, $default_language); $default_language = $next_language; $_ = &convert_iso_latin_chars($after); $default_language = pop @language_stack; } else { $funct = $language_translations{$default_language}; (defined(&$funct) ? $_ = &$funct($_) : do { &write_warnings( "\nCould not find translation function for $default_language.\n\n") } ); if ($USE_UTF ||(!$NO_UTF &&(defined %unicode_table)&&length(%unicode_table)>2)) { &convert_to_unicode($_)}; } $_ = join('', @case_processed, $_); undef(@case_processed); $_; } # May need to add something here later sub english_translation { $_[0] } # This replaces \setlanguage{\language} with \languageTeX # This makes the identification of language chunks easier. sub normalize_language_changes { s/$setlanguage_rx/\\$2TeX/gs; } sub get_current_language { return () if ($default_language eq $TITLES_LANGUAGE); local($lang,$lstyle) = ' LANG="'; $lang_code = $iso_languages{$default_language}; if (%styled_languages) { $lstyle = $styled_languages{$default_language}; $lstyle = '" CLASS="'.$lstyle if $lstyle; } ($lang_code ? $lang.$lang_code.$lstyle.'"' : ''); } %styled_languages = (); sub do_cmd_htmllanguagestyle { local($_) = @_; local($class) = &get_next_optional_argument; local($lang) = &missing_braces unless ( (s/$next_pair_pr_rx/$lang=$2;''/e) ||(s/$next_pair_rx/$lang=$2;''/e)); return ($_) unless $lang; local($class) = $iso_languages{$lang} unless $class; if ($USING_STYLES && $class) { print "\nStyling language: $lang = \"$class\" "; $styled_languages{"$lang"} = $class; } $_; } # General translation mechanism: # # # The main program latex2html calls texexpand with the document name # in order to expand some of its \input and \include statements, here # also called 'merging', and to write a list of sensitized style, class, # input, or include file names. # When texexpand has finished, all is contained in one file, TMP_foo. # (assumed foo.tex is the name of the document to translate). # # In this version, texexpand cares for following environments # that may span include files / section boundaries: # (For a more technical description, see texexpand.) # a) \begin{comment} # b) %begin{comment} # c) \begin{any} introduced with \excludecomment # d) %begin{any} # e) \begin{verbatim} # f) \begin{latexonly} # g) %begin{latexonly} # # a)-d) cause texexpand to drop its contents, it will not show up in the # output file. You can use this to 'comment out' a bunch of files, say. # # e)-g) prevent texexpand from expanding input files, but the environment # content goes fully into the output file. # # Together with each merging of \input etc. there are so-called %%%texexpand # markers accompanying the boundary. # # When latex2html reads in the output file, it uses these markers to write # each part to a separate file, and process them further. # # # If you have, for example: # # a) preample # b) \begin{document} # c) text # d) \input{chapter} # e) more text # f) \end{document} # # you end up in two parts, part 1 is a)-c), part 2 is the rest. # Regardless of environments spanning input files or sections. # # # What now starts is meta command substitution: # Therefore, latex2html forks a child process on the first part and waits # until it finished, then forks another on the next part and so forth # (see also &slurp_input_and_partition_and_preprocess). # # Here's what each child is doing: # Each child process reads the new commands translated so far by the previous # child from the TMP_global DBM database. # After &pre_processing, it substitutes the meta commands (\newcommand, \def, # and the like) it finds, and adds the freshly retrieved new commands to the # list so far. # This is done *only on its part* of the document; this saves upwards of memory. # Finally, it writes its list of new commands (synopsis and bodies) to the # DBM database, and exits. # After the last child finished, latex2html reads in all parts and # concatenates them. # # # So, at this point in time (start of &translate), it again has the complete # document, but now preprocessed and with new commands substituted. # This has several disadvantages: an amount of commands is substituted (in # TeX lingo, expanded) earlier than the rest. # This causes trouble if commands really must get expanded at the point # in time they show up. # # # Then, still in &translate, latex2html uses the list of section commands to # split the complete document into chunks. # The chunks are not written to files yet. They are retained in the @sections # list, but each chunk is handled separately. # latex2html puts the current chunk to $_ and processes it with # &translate_environments etc., then fetches the next chunk, and so on. # This prevents environments that span section boundaries from getting # translated, because \begin and \end cannot find one another, to say it this # way. # # # After the chunk is translated to HTML, it is written to a file. # When all chunks are done, latex2html rereads each file to get cross # references right, replace image markers with the image file names, and # writes index and bibliography. # # sub translate { &normalize_sections; # Deal with the *-form of sectioning commands # Split the input into sections, keeping the preamble together # Due to the regular expression, each split will create 5 more entries. # Entry 1 and 2: non-letter/letter sectioning command, # entry 4: the delimiter (may be empty) # entry 5: the text. local($pre_section, @sections); if (/\\(startdocument|begin\s*($O\d+$C)\s*document\s*\2)/) { $pre_section = $`.$&; $_ = $'; } @sections = split(/$sections_rx/, $_); $sections[0] = $pre_section.$sections[0] if ($pre_section); undef $pre_section; local($sections) = int(scalar(@sections) / 5); # Initialises $curr_sec_id to a list of 0's equal to # the number of sectioning commands. local(@curr_sec_id) = split(' ', &make_first_key); local(@segment_sec_id) = @curr_sec_id; local($i, $j, $current_depth) = (0,0,0); local($curr_sec) = $SHORT_FILENAME||$FILE; local($top_sec) = ($SEGMENT ? '' : 'top of '); # local(%section_info, %toc_section_info, $CURRENT_FILE, %cite_info, %ref_files); local($CURRENT_FILE); # These filenames may be set when translating the corresponding commands. local($tocfile, $loffile, $lotfile, $footfile, $citefile, $idxfile, $figure_captions, $table_captions, $footnotes, $citations, %font_size, %index, %done, $t_title, $t_author, $t_date, $t_address, $t_affil, $changed); local(@authors,@affils,@addresses,@emails,@authorURLs); local(%index_labels, %index_segment, $preindex, %footnotes, %citefiles); local($segment_table_captions, $segment_figure_captions); local($dir,$nosave) = ('',''); local($del,$close_all,$open_all,$toc_sec_title,$multiple_toc); local($open_tags_R) = []; local(@save_open_tags)= (); local(@language_stack) = (); push (@language_stack, $default_language); # $LATEX_FONT_SIZE = '10pt' unless ($LATEX_FONT_SIZE); &process_aux_file if $SHOW_SECTION_NUMBERS || /\\(caption|(html|hyper)?((eq)?ref|cite))/; require ("${PREFIX}internals.pl") if (-f "${PREFIX}internals.pl"); #JCL(jcl-del) &make_single_cmd_rx; # $tocfile = $EXTERNAL_CONTENTS; $idxfile = $EXTERNAL_INDEX; $citefile = $EXTERNAL_BIBLIO; $citefile =~ s/#.*$//; $citefiles{1} = $citefile if ($citefile); print "\nTranslating ..."; while ($i <= @sections) { undef $_; $_ = $sections[$i]; s/^[\s]*//; # Remove initial blank lines # The section command was removed when splitting ... s/^/\\$curr_sec$del/ if ($i > 0); # ... so put it back if ($current_depth < $MAX_SPLIT_DEPTH) { if (($footnotes)&&($NO_FOOTNODE)&&( $current_depth < $MAX_SPLIT_DEPTH)) { local($thesenotes) = &make_footnotes ; print OUTPUT $thesenotes; } $CURRENT_FILE = &make_name($curr_sec, join('_',@curr_sec_id)); open(OUTPUT, ">$CURRENT_FILE") || die "Cannot write '$CURRENT_FILE': $!\n"; if ($XBIT_HACK) { # use Apache's XBit hack chmod 0744, $CURRENT_FILE; &check_htaccess; } else { chmod 0644, $CURRENT_FILE; } if ($MULTIPLE_FILES && $ROOTED) { if ($DESTDIR =~ /^\Q$FIXEDDIR\E[$dd$dd]?([^$dd$dd]+)/) { $CURRENT_FILE = "$1$dd$CURRENT_FILE" }; } } &remove_document_env; # &wrap_shorthand_environments; #RRM Is this needed ? print STDOUT "\n" if ($VERBOSITY); print STDOUT "\n" if ($VERBOSITY > 2); print $i/5,"/$sections"; print ":$top_sec$curr_sec:" if ($VERBOSITY); # Must do this early ... It also sets $TITLE &process_command($sections_rx, $_) if (/^$sections_rx/); # reset tags saved from the previous section $open_tags_R = [ @save_open_tags ]; @save_open_tags = (); local($curr_sec_tex); if ((! $TITLE) || ($TITLE eq $default_title)) { eval '$TITLE = '.$default_title; $TITLE = $default_title if $@; $curr_sec_tex = ($top_sec ? '' : join('', '"', &revert_to_raw_tex($curr_sec), '"')); print STDOUT "$curr_sec_tex for $CURRENT_FILE\n" if ($VERBOSITY); } else { local($tmp) = &purify($TITLE,1); $tmp = &revert_to_raw_tex($tmp); print STDOUT "\"$tmp\" for $CURRENT_FILE\n" if ($VERBOSITY); } if (/\\(latextohtmlditchpreceding|startdocument)/m) { local($after) = $'; local($before) = $`.$&; $SEGMENT = 1 if ($1 =~ /startdocument/); print STDOUT "\n *** translating preamble ***\n" if ($VERBOSITY); $_ = &translate_preamble($before); s/\n\n//g; s/
//g; # remove redundant blank lines and breaks # # &process_aux_file if $AUX_FILE_NEEDED; # print STDOUT "\n *** preamble done ***\n" if ($VERBOSITY); $PREAMBLE = 0; $NESTING_LEVEL=0; &do_AtBeginDocument; $after =~ s/^\s*//m; print STDOUT (($VERBOSITY >2)? "\n*** Translating environments ***" : ";"); $after = &translate_environments($after); print STDOUT (($VERBOSITY >2)? "\n*** Translating commands ***" : ";"); $_ .= &translate_commands($after); # $_ = &translate_commands($after); } else { &do_AtBeginDocument; $PREAMBLE = 0; $NESTING_LEVEL=0; print STDOUT (($VERBOSITY >2)? "\n*** Translating environments ***" : ";"); $_ = &translate_environments($_); print STDOUT (($VERBOSITY >2)? "\n*** Translating commands ***" : ";"); $_ = &translate_commands($_); } # close any tags that remain open if (@$open_tags_R) { ($close_all,$open_all) = &preserve_open_tags(); $_ .= $close_all; @save_open_tags = @$open_tags_R; $open_tags_R = []; } else { ($close_all,$open_all) = ('','') } print STDOUT (($VERBOSITY >2)? "\n*** Translations done ***" : "\n"); # if (($footnotes)&&($NO_FOOTNODE)&&( $current_depth < $MAX_SPLIT_DEPTH)) { # $_ .= &make_footnotes # } print OUTPUT $_; # Associate each id with the depth, the filename and the title ###MEH -- starred sections don't show up in TOC ... # RRM: ...unless $TOC_STARS is set # $toc_sec_title = &simplify($toc_sec_title); $toc_sec_title = &purify($toc_sec_title);# if $SEGMENT; $toc_sec_title = &purify($TITLE) unless ($toc_sec_title); if ($TOC_STARS) { $toc_section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$toc_sec_title" # if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); if ($current_depth <= $TOC_DEPTH); } else { $toc_section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$toc_sec_title" . ($curr_sec =~ /star$/ ? "$delim" : "") # if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); if ($current_depth <= $TOC_DEPTH); } # include $BODYTEXT in the section_info, when starting a new page $section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE$delim" . (($current_depth < $MAX_SPLIT_DEPTH)? $BODYTEXT: ""); # Get type of section (see also the split above) $curr_sec = $sections[$i+1].$sections[$i+2]; $del = $sections[$i+4]; # Get the depth of the current section; # $curr_sec = $outermost_level unless $curr_sec; $current_depth = $section_commands{$curr_sec}; if ($after_segment) { $current_depth = $after_segment; $curr_sec_id[$after_segment] += $after_seg_num; ($after_segment,$after_seg_num) = ('',''); for($j=1+$current_depth; $j <= $#curr_sec_id; $j++) { $curr_sec_id[$j] = 0; } } if ($SEGMENT||$SEGMENTED) { for($j=1; $j <= $#curr_sec_id; $j++) { $curr_sec_id[$j] += $segment_sec_id[$j]; $segment_sec_id[$j] = 0; } }; # this may alter the section-keys $multiple_toc = 1 if ($MULTIPLE_FILES && $ROOTED && (/$toc_mark/)); #RRM : Should this be done here, or in \stepcounter ? @curr_sec_id = &new_level($current_depth, @curr_sec_id); $toc_sec_title = $TITLE = $top_sec = ''; $i+=5; #skip to next text section } $open_tags_R = []; $open_all = ''; $_ = undef; $_ = &make_footnotes if ($footnotes); $CURRENT_FILE = ''; print OUTPUT; close OUTPUT; # # this may alter the section-keys # &adjust_root_keys if $multiple_toc; if ($PREPROCESS_IMAGES) { &preprocess_images } else { &make_image_file } print STDOUT "\n *** making images ***" if ($VERBOSITY > 1); &make_images; # Link sections, add head/body/address do cross-refs etc print STDOUT "\n *** post-process ***" if ($VERBOSITY > 1); &post_process; if (defined &document_post_post_process) { #BRM: extra document-wide post-processing print STDOUT "\n *** post-processing Document ***" if ($VERBOSITY > 1); &document_post_post_process(); } print STDOUT "\n *** post-processed ***" if ($VERBOSITY > 1); ©_icons if $LOCAL_ICONS; if ($SEGMENT || $DEBUG || $SEGMENTED) { &save_captions_in_file("figure", $figure_captions) if $figure_captions; &save_captions_in_file("table", $table_captions) if $table_captions; # &save_array_in_file ("captions", "figure_captions", 0, %figure_captions) if %figure_captions; # &save_array_in_file ("captions", "table_captions", 0, %table_captions) if %table_captions; &save_array_in_file ("index", "index", 0, %index); &save_array_in_file ("sections", "section_info", 0, %section_info); &save_array_in_file ("contents", "toc_section_info", 0,%toc_section_info); &save_array_in_file ("index", "sub_index", 1, %sub_index) if %sub_index; &save_array_in_file ("index", "index_labels", 1, %index_labels) if %index_labels; &save_array_in_file ("index", "index_segment", 1, %index_segment) if %index_segment; &save_array_in_file ("index", "printable_key", 1, %printable_key) if (%printable_key || %index_segment); } elsif ($MULTIPLE_FILES && $ROOTED) { &save_array_in_file ("sections", "section_info", 0, %section_info); &save_array_in_file ("contents", "toc_section_info", 0, %toc_section_info); } &save_array_in_file ("internals", "ref_files", 0, %ref_files) if $changed; &save_array_in_file ("labels", "external_labels", 0, %ref_files); &save_array_in_file ("labels", "external_latex_labels", 1, %latex_labels); &save_array_in_file ("images", "cached_env_img", 0, %cached_env_img); } # RRM: sub translate_preamble { local($_) = @_; $PREAMBLE = 1; $NESTING_LEVEL=0; #counter for TeX group nesting level # remove some artificially inserted constructions s/\n${tex2html_deferred_rx}\\par\s*${tex2html_deferred_rx2}\n/\n/gm; s/\\newedcommand(<<\d+>>)([A-Za-z]+|[^A-Za-z])\1(\[\d+\])?(\[[^]]*\])?(<<\d+>>)[\w\W\n]*\5($comment_mark\d*)?//gm; s/\n{2,}/\n/ogm; if (/\\htmlhead/) { print STDOUT "\nPREAMBLE: discarding...\n$`" if ($VERBOSITY > 4); local($after) = $&.$'; # translate segment preamble preceding \htmlhead &translate_commands(&translate_environments($`)); # translate \htmlhead and rest of preamble $_=&translate_commands(&translate_environments($after)); print STDOUT "\nPREAMBLE: retaining...\n$_" if ($VERBOSITY > 4); } else { # translate only preamble here (metacommands etc.) # there should be no textual results, if so, discard them &translate_commands(&translate_environments($_)); print STDOUT "\nPREAMBLE: discarding...\n$_" if ($VERBOSITY > 4); $_=""; }; $_ = &do_AtBeginDocument($_); if (! $SEGMENT) { $_ = ''} # segmented documents have a heading already $_; } ############################ Processing Environments ########################## sub wrap_shorthand_environments { # This wraps a dummy environment around environments that do not use # the begin-end convention. The wrapper will force them to be # evaluated by Latex rather than them being translated. # Wrap a dummy environment around matching TMPs. # s/^\$\$|([^\\])\$\$/{$1.&next_wrapper('tex2html_double_dollar')}/ge; # Wrap a dummy environment around matching $s. # s/^\$|([^\\])\$/{$1.&next_wrapper('$')}/ge; # s/tex2html_double_dollar/\$\$/go; # Do \(s and \[s # local($wrapper) = "tex2html_wrap_inline"; # \ensuremath wrapper print STDOUT "\n *** wrapping environments ***\n" if ($VERBOSITY > 3); # MRO: replaced $* with /m print STDOUT "\\(" if ($VERBOSITY > 3); s/(^\\[(])|([^\\])(\\[(])/{$2.&make_any_wrapper(1,'',$wrapper).$1.$3}/geom; print STDOUT "\\)" if ($VERBOSITY > 3); s/(^\\[)]|[^\\]\\[)])/{$1.&make_any_wrapper(0,'',$wrapper)}/geom; print STDOUT "\\[" if ($VERBOSITY > 3); s/(^\\[[])|([^\\])(\\[[])/{$2.&make_any_wrapper(1,1,"displaymath")}/geom; print STDOUT "\\]" if ($VERBOSITY > 3); s/(^\\[\]])|([^\\])(\\[\]])/{$2.&make_any_wrapper(0,1,"displaymath")}/geom; print STDOUT "\$" if ($VERBOSITY > 3); s/$enspair/print "\$"; {&make_any_wrapper(1,'',$wrapper).$&.&make_any_wrapper(0,'',$wrapper)}/geom; $double_dol_rx = '(^|[^\\\\])\\$\\$'; $single_dol_rx = '(^|[^\\\\])\\$'; print STDOUT "\$" if ($VERBOSITY > 3); local($dollars_remain) = 0; $_ = &wrap_math_environment; $_ = &wrap_raw_arg_cmds; } sub wrap_math_environment { # This wraps math-type environments # The trick here is that the opening brace is the same as the close, # but they *can* still nest, in cases like this: # # $ outer stuff ... \hbox{ ... $ inner stuff $ ... } ... $ # # Note that the inner pair of $'s is nested within a group. So, to # handle these cases correctly, we need to make sure that the outer # brace-level is the same as the inner. --- rst #tex2html_wrap # And yet another problem: there is a scungy local idiom to do # this: $\_$ for a boldfaced underscore. xmosaic can't display the # resulting itty-bitty bitmap, for some reason; even if it could, it # would probably come out as an overbar because of the floating- # baseline problem. So, we have to special case this. --- rst again. local ($processed_text, @processed_text, $before, $end_rx, $delim, $ifclosed); local ($underscore_match_rx) = "^\\s*\\\\\\_\\s*\\\$"; local ($wrapper); print STDOUT "\nwrap math:" if ($VERBOSITY > 3); #find braced dollars, in tabular-specs while (/((($O|$OP)\d+($C|$CP))\s*)\$(\s*\2)/) { push (@processed_text, $`, $1.$dol_mark.$5); $_ = $'; } $_ = join('',@processed_text, $_) if (@processed_text); undef @processed_text; $dollars_remain = 0; while (/$single_dol_rx/) { $processed_text .= $`.$1; $_ = $'; $wrapper = "tex2html_wrap_inline"; $end_rx = $single_dol_rx; # Default, unless we begin with $$. $delim = "\$"; if (/^\$/ && (! $`)) { s/^\$//; $end_rx = $double_dol_rx; $delim = ""; # Cannot say "\$\$" inside displaymath $wrapper = "displaymath"; } elsif (/$underscore_match_rx/ && (! $`)) { # Special case for $\_$ ... s/$underscore_match_rx//; $processed_text .= '\\_'; next; } # Have an opening $ or $$. Find matching close, at same bracket level # $processed_text .= &make_any_wrapper(1,'',$wrapper).$delim; print STDOUT "\$" if ($VERBOSITY > 3); $ifclosed = 0; local($thismath); while (/$end_rx/) { # Forget the $$ if we are going to replace it with "displaymath" $before = $` . (($wrapper eq "displaymath")? "$1" : $&); last if ($before =~ /\\(sub)*(item|section|chapter|part|paragraph)(star)?\b/); $thismath .= $before; $_ = $'; s/^( [^\n])/\\space$1/s; #make sure a trailing space doesn't get lost. # Found dollar sign inside open subgroup ... now see if it's # at the same brace-level ... local ($losing, $br_rx) = (0, ''); print STDOUT "\$" if ($VERBOSITY > 3); while ($before =~ /$begin_cmd_rx/) { $br_rx = &make_end_cmd_rx($1); $before = $'; if ($before =~ /$br_rx/) { $before = $'; } else { $losing = 1; last; } } do { $ifclosed = 1; last } unless $losing; # It wasn't ... find the matching close brace farther on; then # keep going. /$br_rx/; $thismath .= $`.$&; #RRM: may now contain unprocessed $s e.g. $\mbox{...$...$...}$ # the &do_cmd_mbox uses this specially to force an image # ...but there may be other situations; e.g. \hbox # so set a flag: $dollars_remain = 1; $_ = $'; } # Got to the end. Whew! if ($ifclosed) { # also process any nested math while (($dollars_remain)&&($delim eq "\$")) { local($saved) = $_; $thismath =~ s/\$$//; $_ = $thismath; $thismath = &wrap_math_environment; $thismath .= "\$"; $_ = $saved; } $processed_text .= &make_any_wrapper(1,'',$wrapper) . $delim . $thismath . &make_any_wrapper(0,'',$wrapper); } else { print STDERR "\n\n *** Error: unclosed math or extra `\$', before:\n$thismath\n\n"; # # remove a $ to try to recover as much as possible. # $thismath =~ s/([^\\]\\\\|[^\\])\$/$1\%\%/; # $_ = $thismath . $_; $thismath = ""; print "\n$thismath\n\n\n$_\n\n\n"; die; } } $processed_text . $_; } sub translate_environments { local ($_) = @_; local($tmp, $capenv); # print "\nTranslating environments ..."; local($after, @processedE); local ($contents, $before, $br_id, $env, $pattern); for (;;) { # last unless (/$begin_env_rx/o); last unless (/$begin_env_rx|$begin_cmd_rx|\\(selectlanguage)/o); # local ($contents, $before, $br_id, $env, $pattern); local($this_env, $opt_arg, $style_info); $contents = ''; # $1,$2 : optional argument/text --- stylesheet info # $3 : br_id (at the beginning of an environment name) # $4 : environment name # $5 : br_id of open-brace, when $3 == $4 == ''; # $6 : \selectlanguage{...} if ($7) { push(@processedE,$`); $_ = $'; if (defined &do_cmd_selectlanguage) { $_ = &do_cmd_selectlanguage($_); } else { local($cmd) = $7; $pattern = &missing_braces unless ( s/$next_pair_rx/$pattern = $2;''/e); local($trans) = $pattern.'_translation'; if (defined &$trans) { &set_default_language($pattern,$_); } undef $cmd; undef $trans; } next; } elsif ($4) { ($before, $opt_arg, $style_info, $br_id , $env, $after, $pattern) = ($`, $2, $3, $4, $5, $', $&); if (($before)&& (!($before =~ /$begin_env_rx|$begin_cmd_rx/))) { push(@processedE,$before); $_ = $pattern . $after; $before = ''; } } else { ($before, $br_id, $env, $after, $pattern) = ($`, $6, 'group', $', $&); if (($before)&& (!($before =~ /$begin_env_rx|$begin_cmd_rx/))) { push(@processedE,$before); $_ = $pattern . $after; $before = ''; } local($end_cmd_rx) = &make_end_cmd_rx($br_id); if ($after =~ /$end_cmd_rx/) { # ... find the the matching closing one $NESTING_LEVEL++; ($contents, $after) = ($`, $'); $contents = &process_group_env($contents); print STDOUT "\nOUT: {$br_id} ".length($contents) if ($VERBOSITY > 3); print STDOUT "\n:$contents\n" if ($VERBOSITY > 7); # THIS MARKS THE OPEN-CLOSE DELIMITERS AS PROCESSED $_ = join("", $before,"$OP$br_id$CP", $contents,"$OP$br_id$CP", $after); $NESTING_LEVEL--; } else { $pattern = &escape_rx_chars($pattern); s/$pattern//; print "\nCannot find matching bracket for $br_id"; $_ = join("", $before,"$OP$br_id$CP", $after); } next; } $contents = undef; local($defenv) = $env =~ /deferred/; # local($color_env); local($color_env) unless ($env =~ /tabular|longtable|in(line|display)|math/); local($closures,$reopens); local(@save_open_tags) = @$open_tags_R unless ($defenv); local($open_tags_R) = [ @save_open_tags ] unless ($defenv); local(@saved_tags) if ($env =~ /tabular|longtable/); if ($env =~ /tabular|longtable|makeimage|in(line|display)/) { @save_open_tags = @$open_tags_R; $open_tags_R = [ @save_open_tags ]; # check for color local($color_test) = join(',',@$open_tags_R); if ($color_test =~ /(color{[^}]*})/g ) { $color_env = $1; } # else { $color_env = '' } if ($env =~ /tabular|longtable|makeimage/) { # close to the surrounding block-type tag ($closures,$reopens,@saved_tags) = &preserve_open_block_tags(); @save_open_tags = @$open_tags_R; $open_tags_R = [ @save_open_tags ]; if ($color_env) { $color_test = join(',',@saved_tags); if ($color_test =~ /(color{[^}]*})/g ) { $color_env = $1; } } } elsif ($env =~ /in(line|display)/) { $closures = &close_all_tags() if ((&defined_env($env)) &&!($defenv)&&!($env=~/inline/)&&(!$declarations{$env})); if ($color_env) { $color_test = $declarations{$color_env}; $color_test =~ s/<\/.*$//; $closures .= "\n$color_test"; push (@$open_tags_R , $color_env); } } } elsif ($env =~ /alltt|tex2html_wrap/) { # alltt is constructed as paragraphs, not with
	    #  tex2html_wrap  creates an image, which is at text-level
	} else {
	    $closures = &close_all_tags() if ((&defined_env($env))
		&&!($defenv)&&(!$declarations{$env}) );
	}
	# Sets $contents and modifies $after
	if (&find_end_env($env,$contents,$after)) {
	    print STDOUT "\nIN-A {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    &process_command($counters_rx, $before)
		if ($before =~ /$counters_rx/);
	    # This may modify $before and $after
	    # Modifies $contents
#RRM: the do_env_... subroutines handle when to translate sub-environments
#	    $contents = &translate_environments($contents) if
##		((!$defenv) && (&defined_env($env)) && (! $raw_arg_cmds{$env})
##		&& (!$declarations{$env})
#		((&defined_env($env)) && (! $raw_arg_cmds{$env})
#		&& (!($env =~ /latexonly|enumerate|figure|table|makeimage|wrap_inline/))
#		&& ((! $NO_SIMPLE_MATH)||(!($env =~ /wrap/)))
#		&& (!($env =~ /(math|wrap|equation|eqnarray|makeimage|minipage|tabular)/) )
#		);
	    if ($opt_arg) { 
		&process_environment(1, $env, $br_id, $style_info); # alters $contents
	    } else {
		&process_environment(0, $env, $br_id, '');
	    }
	    undef $_;
	    print STDOUT "\nOUT-A {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    #JCL(jcl-env) - insert the $O$br_id$C stuff to handle environment grouping
	    if (!($contents eq '')) {
		$after =~ s/^\n//o if ($defenv);
		$this_env = join("", $before, $closures
			  , $contents
			  , ($defenv ? '': &balance_tags())
			  , $reopens ); $_ = $after;
	    } else { 
		$this_env = join("", $before , $closures
			  , ($defenv ? '': &balance_tags())
			  , $reopens ); $_ = $after;
	    };
	### Evan Welsh  added the next 24 lines ##
	} elsif (&defined_env($env)) {
	    print STDOUT "\nIN-B {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    # If I specify a function for the environment then it
	    # calls it with the contents truncated at the next section.
	    # It assumes I know what I'm doing and doesn't give a
	    # deferred warning.
	    $contents = $after;
	    if ($opt_arg) { 
		$contents = &process_environment(1, $env, $br_id, $style_info);
	    } else {
		$contents = &process_environment(0, $env, $br_id, '');
	    }
	    print STDOUT "\nOUT-B {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    $this_env = join("", $before, $closures ,$contents, $reopens);

	    # there should not be anything left over 
#	    $_ = $after;
	    $_ = '';
	} elsif ($ignore{$env}) {
	    print STDOUT "\nIGNORED {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    # If I specify that the environment should be ignored then
	    # it is but I get a deferred warning.
	    $this_env = join("", $before , $closures , &balance_tags()
		      , $contents, $reopens );
	    $_ = $after;
	    &write_warnings("\n\\end{$env} not found (ignored).\n");
	} elsif ($raw_arg_cmds{$env}) {
	    print "\nIN-C {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    # If I specify that the environment should be passed to tex
	    # then it is with the environment truncated at the next
	    # section and I get a deferred warning.

	    $contents = $after;
	    if ($opt_arg) { 
		$contents = &process_environment(1, $env, $br_id, $style_info);
	    } else {
		$contents = &process_environment(0, $env, $br_id, '');
	    }
	    print STDOUT "\nOUT-C {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
	    $this_env = join("", $before, $closures
			     , $contents, &balance_tags(), $reopens );
	    $_='';
	    &write_warnings(
	        "\n\\end{$env $br_id} not found (truncated at next section boundary).\n");
	} else {
	    $pattern = &escape_rx_chars($pattern);
	    s/$pattern/$closures/;
	    print "\nCannot find \\end{$env $br_id}\n";
	    $_ .= join('', &balance_tags(), $reopens) unless ($defenv);
	}
	if ($this_env =~ /$begin_env_rx|$begin_cmd_rx/) {
	    $_ = $this_env . $_;
	} else { push (@processedE, $this_env) }
    }
    $_ = join('',@processedE) . $_;
    $tmp = $_; undef $_;
    &process_command($counters_rx, $tmp) if ($tmp =~ /$counters_rx/);
    $_ = $tmp; undef $tmp;
    $_
}

sub find_end_env {
    # MRO: find_end_env($env,$contents,$rest)
    #local ($env, *ref_contents, *rest) = @_;
    my $env = $_[0];
    my $be_rx = &make_begin_end_env_rx($env);
    my $count = 1;

    while ($_[2] =~ /($be_rx)(\n?)/s) { # $rest
	$_[1] .= $`; # $contents

	if ($2 eq "begin") { ++$count }
	else { --$count };

	#include any final \n at an {end} only
	$_[2] = (($2 eq 'end')? $5 : '') . $'; # $rest
	last if $count == 0;

	$_[1] .= $1; # $contents
    }

    if ($count != 0) {
	$_[2] = join('', $_[1], $_[2]); # $rest = join('', $contents, $rest);
	$_[1] = ''; # $contents
	return(0)
    } else { return(1) }
}


sub process_group_env {
    local($contents) = @_;
    local(@save_open_tags) = @$open_tags_R;
    local($open_tags_R) = [ @save_open_tags ];
    print STDOUT "\nIN::{group $br_id}" if ($VERBOSITY > 4);
    print STDOUT "\n:$contents\n" if ($VERBOSITY > 6);

    # need to catch explicit local font-changes
    local(%font_size) = %font_size if (/\\font\b/);

    # record class/id info for a style-sheet entry
    local($env_id, $tmp, $etmp);
    if (($USING_STYLES) && !$PREAMBLE ) { $env_id = $br_id; }
#	$env_id = "grp$br_id";
#	$styleID{$env_id} = " ";
#        $env_id = " ID=\"$env_id\"";
#    }

    undef $_;
    $contents =~ s/^\s*$par_rx\s*//s; # don't start with a \par 
    if ($contents =~ /^\s*\\($image_switch_rx)\b\s*/s) {
	# catch TeX-like environments: {\fontcmd ... }
	local($image_style) = $1;
	if ($USING_STYLES) {
	    $env_style{$image_style} = " " unless ($env_style{$image_style});
	}
	local($switch_cmd) = "do_cmd_${image_style}";
	if (defined &$switch_cmd ) {
	    eval "\$contents = \&${switch_cmd}(\$')";
	    print "\n*** &$switch_cmd didn't work: $@\n$contents\n\n" if ($@);
	} elsif ($contents =~ /$par_rx/) {
	    # split into separate image for each paragraph
	    local($par_style,$this_par_img) = '';
	    local(@par_pieces) = split($par_rx, $contents);
	    local($this_par,$par_style,$par_comment);
	    $contents = '';
	    while (@par_pieces) {
		$this_par = shift @par_pieces;
		if ($this_par =~ /^\s*\\($image_switch_rx)\b/s) {
		    $image_style = $1;
		    $par_style = 'P.'.$1;
		    $env_style{$par_style} = " " unless ($env_style{$par_style});
		}
#	no comment: source is usually too highly encoded to be meaningful
#	$par_comment = &make_comment($image_style,$this_par);
		$this_par_img = &process_in_latex("\{".$this_par."\}");
		$contents .=  join(''  #,"\n", $par_comment
			, "\n", $this_par_img
			, "

\n"); if (@par_pieces) { # discard the pieces from matching $par_rx $dum = shift @par_pieces; $dum = shift @par_pieces; $dum = shift @par_pieces; $dum = shift @par_pieces; $dum = shift @par_pieces; $dum = shift @par_pieces; # $contents .= "\n

\n

"; } } } else { $contents = &process_undefined_environment("tex2html_accent_inline" , ++$global{'max_id'},"\{".$contents."\}"); } } elsif ($contents =~ /^\s*\\(html)?url\b($O\d+$C)[^<]*\2\s*/) { # do nothing $contents = &translate_environments($contents); $contents = &translate_commands($contents); } elsif (($env_switch_rx)&&($contents =~ s/^(\s*)\\($env_switch_rx)\b//s)) { # write directly into images.tex, protected by \begingroup...\endgroup local($prespace, $cmd, $tmp) = ($1,$2,"do_cmd_$2"); $latex_body .= "\n\\begingroup "; if (defined &$tmp) { eval("\$contents = &do_cmd_$cmd(\$contents)"); } $contents = &translate_environments($contents); $contents = &translate_commands($contents); undef $tmp; undef $cmd; $contents .= "\n\\endgroup "; } elsif ($contents =~ /^\s*\\([a-zA-Z]+)\b/s) { local($after_cmd) = $'; local($cmd) = $1; $tmp = "do_cmd_$cmd"; $etmp = "do_env_$cmd"; if (($cmd =~/^(rm(family)?|normalsize)$/) ||($declarations{$cmd}&&(defined &$tmp))) { do{ local(@save_open_tags) = @$open_tags_R; eval "\$contents = \&$tmp(\$after_cmd);"; print "\n*** eval &$tmp failed: $@\n$contents\n\n" if ($@); $contents .= &balance_tags(); }; } elsif ($declarations{$cmd}&&(defined &$etmp)) { eval "\$contents = \&$etmp(\$after_cmd);"; } else { $contents = &translate_environments($contents); $contents = &translate_commands($contents) if ($contents =~ /$match_br_rx/o); # Modifies $contents &process_command($single_cmd_rx,$contents) if ($contents =~ /\\/o); } undef $cmd; undef $tmp; undef $etmp; } else { $contents = &translate_environments($contents); $contents = &translate_commands($contents) if ($contents =~ /$match_br_rx/o); # Modifies $contents &process_command($single_cmd_rx,$contents) if ($contents =~ /\\/o); } $contents . &balance_tags(); } # MODIFIES $contents sub process_environment { local($opt, $env, $id, $styles) = @_; local($envS) = $env; $envS =~ s/\*\s*$/star/; local($env_sub,$border,$attribs,$env_id) = ("do_env_$envS",'','',''); local($original) = $contents; if ($env =~ /tex2html_deferred/ ) { $contents = &do_env_tex2html_deferred($contents); return ($contents); } $env_id = &read_style_info($opt, $env, $id, $styles) if (($USING_STYLES)&&($opt)); if (&defined_env($env)) { print STDOUT ","; print STDOUT "{$env $id}" if ($VERBOSITY > 1); # $env_sub =~ s/\*$/star/; $contents = &$env_sub($contents); } elsif ($env =~ /tex2html_nowrap/) { #pass it on directly for LaTeX, via images.tex $contents = &process_undefined_environment($env, $id, $contents); return ($contents); # elsif (&special_env) { # &special_env modifies $contents } else { local($no_special_chars) = 0; local($failed) = 0; local($has_special_chars) = 0; &special_env; # modifies $contents print STDOUT "\n" if ($VERBOSITY > 3); if ($failed || $has_special_chars) { $contents = $original; $failed = 1; print STDOUT " !failed!\n" if ($VERBOSITY > 3); } } if (($contents) && ($contents eq $original)) { if ($ignore{$env}) { return(''); } # Generate picture if ($contents =~ s/$htmlborder_rx//o) { $attribs = $2; $border = (($4)? "$4" : 1) } elsif ($contents =~ s/$htmlborder_pr_rx//o) { $attribs = $2; $border = (($4)? "$4" : 1) } $contents = &process_undefined_environment($env, $id, $contents); $env_sub = "post_latex_$env_sub"; # i.e. post_latex_do_env_ENV if ( defined &$env_sub) { $contents = &$env_sub($contents); } elsif (($border||($attributes))&&($HTML_VERSION > 2.1)) { $contents = &make_table($border,$attribs,'','','',$contents); } else { $contents = join('',"
\n",$contents,"\n
") unless (!($contents)||($inner_math)||($env =~ /^(tex2html_wrap|tex2html_nowrap|\w*math|eq\w*n)/o )); } } $contents; } #RRM: This reads the style information contained in the optional argument # to the \begin command. It is stored to be recovered later as an entry # within the automatically-generated style-sheet, if $USING_STYLES is set. # Syntax for this info is: #