#! /usr/local/bin/perl
#
# jLaTeX2HTML version 1.99+3.0 1999/9/15
# jLaTeX2HTML version 2.0 2003/01/25 shige
# $Id: latex2html.pin,v 1.70 2002/08/22 15:14:08 RRM Exp $
#
# Comprises patches and revisions by various authors:
# See Changes, the log file of LaTeX2HTML.
#
# Original Copyright notice:
#
# LaTeX2HTML by Nikos Drakos
# Japanese Patched:
# jLaTeX2HTML by Kenshi Muto
# jLaTeX2HTML copyright follows LaTeX2HTML copyright.
# modified for latex2html-{2K.1beta,2002} by shige
# Shigeharu TAKENO
# ****************************************************************
# LaTeX To HTML Translation **************************************
# ****************************************************************
# LaTeX2HTML is a Perl program that translates LaTeX source
# files into HTML (HyperText Markup Language). For each source
# file given as an argument the translator will create a
# directory containing the corresponding HTML files.
#
# The man page for this program is included at the end of this file
# and can be viewed using "perldoc latex2html"
#
# For more information on this program and some examples of its
# capabilities visit
#
# http://www.latex2html.org/
#
# or see the accompanying documentation in the docs/ directory
#
# or
#
# http://www-texdev.ics.mq.edu.au/l2h/docs/manual/
#
# or
#
# http://www.cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/
#
# Original code written by Nikos Drakos, July 1993.
#
# Address: Computer Based Learning Unit
# University of Leeds
# Leeds, LS2 9JT
#
# Copyright (c) 1993-95. All rights reserved.
#
#
# Extensively modified by Ross Moore, Herb Swan and others
#
# Address: Mathematics Department
# Macquarie University
# Sydney, Australia, 2109
#
# Copyright (c) 1996-2001. All rights reserved.
#
# See general license in the LICENSE file.
#
##########################################################################
use 5.003; # refuse to work with old and buggy perl version
#use strict;
#use diagnostics;
# include some perl packages; these come with the standard distribution
use Getopt::Long;
use Fcntl;
use AnyDBM_File;
# The following are global variables that also appear in some modules
use vars qw($LATEX2HTMLDIR $LATEX2HTMLPLATDIR $SCRIPT
%Month %used_icons $inside_tabbing $TABLE_attribs
%mathentities $date_name $outer_math $TABLE__CELLPADDING_rx);
BEGIN {
# print "scanning for l2hdir\n";
if($ENV{'LATEX2HTMLDIR'}) {
$LATEX2HTMLDIR = $ENV{'LATEX2HTMLDIR'};
} else {
$ENV{'LATEX2HTMLDIR'} = $LATEX2HTMLDIR = '/usr/local/share/lib/latex2html';
}
if($ENV{'LATEX2HTMLPLATDIR'}) {
$LATEX2HTMLPLATDIR = $ENV{'LATEX2HTMLPLATDIR'};
} else {
$LATEX2HTMLPLATDIR = '/usr/local/lib/latex2html'||$LATEX2HTMLDIR;
$ENV{'LATEX2HTMLPLATDIR'} = $LATEX2HTMLPLATDIR;
}
if(-d $LATEX2HTMLPLATDIR) {
push(@INC,$LATEX2HTMLPLATDIR);
}
if(-d $LATEX2HTMLDIR) {
push(@INC,$LATEX2HTMLDIR);
} else {
die qq{Fatal: Directory "$LATEX2HTMLDIR" does not exist.\n};
}
}
use L2hos; # Operating system dependent routines
# $^W = 1; # turn on warnings
my $RELEASE = '2002-2-1';
my ($REVISION) = q$Revision: 1.70 $ =~ /:\s*(\S+)/;
# The key, which delimts expressions defined in the environment
# depends on the operating system.
$envkey = L2hos->pathd();
# $dd is the directory delimiter character
$dd = L2hos->dd();
# make sure the $LATEX2HTMLDIR is on the search-path for forked processes
if($ENV{'PERL5LIB'}) {
$ENV{'PERL5LIB'} .= "$envkey$LATEX2HTMLDIR"
unless($ENV{'PERL5LIB'} =~ m|\Q$LATEX2HTMLDIR\E|o);
} else {
$ENV{'PERL5LIB'} = $LATEX2HTMLDIR;
}
# Local configuration, read at runtime
# Read the $CONFIG_FILE (usually l2hconf.pm )
if($ENV{'L2HCONFIG'}) {
require $ENV{'L2HCONFIG'} ||
die "Fatal (require $ENV{'L2HCONFIG'}): $!";
} else {
eval 'use l2hconf';
if($@) {
die "Fatal (use l2hconf): $@\n";
}
}
# MRO: Changed this to global value in config/config.pl
# change these whenever you do a patch to this program and then
# name the resulting patch file accordingly
# $TVERSION = "2002-2-1";
#$TPATCHLEVEL = " beta";
#$TPATCHLEVEL = " release";
#$RELDATE = "(March 30, 1999)";
#$TEX2HTMLV_SHORT = $TVERSION . $TPATCHLEVEL;
$JVERSION = "JA patch-2.0";
$TEX2HTMLV_SHORT = $RELEASE;
$JTEX2HTMLV_SHORT = "$RELEASE $JVERSION";
$TEX2HTMLVERSION = "$TEX2HTMLV_SHORT ($REVISION)";
$JTEX2HTMLVERSION = "$TEX2HTMLV_SHORT ($REVISION) $JVERSION";
$TEX2HTMLADDRESS = "http://www.latex2html.org/";
#$JTEX2HTMLADDRESS = "http://www.topstudio.co.jp/~kmuto/software/latex2html/";
$JTEX2HTMLADDRESS = "http://takeno.iee.niit.ac.jp/~shige/TeX/latex2html/ltx2html.html";
$AUTHORADDRESS = "http://cbl.leeds.ac.uk/nikos/personal.html";
#$AUTHORADDRESS2 = "http://www-math.mpce.mq.edu.au/%7Eross/";
$AUTHORADDRESS2 = "http://www.maths.mq.edu.au/~ross/";
$JAUTHORADDRESS = "http://www.topstudio.co.jp/~kmuto/";
$JAUTHORADDRESS2 = "http://takeno.iee.niit.ac.jp/~shige/";
# Set $HOME to what the system considers the home directory
$HOME = L2hos->home();
push(@INC,$HOME);
# flush stdout with every print -- gives better feedback during
# long computations
$| = 1;
# set Perl's subscript separator to LaTeX's illegal character.
# (quite defensive but why not)
$; = "\000";
# No arguments!!
unless(@ARGV) {
die "Error: No files to process!\n";
}
# Image prefix
$IMAGE_PREFIX = '_image';
# Partition prefix
$PARTITION_PREFIX = 'part_' unless $PARTITION_PREFIX;
# Author address
@address_data = &address_data('ISO');
$ADDRESS = "$address_data[0]\n$address_data[1]";
# ensure non-zero defaults
$MAX_SPLIT_DEPTH = 4 unless ($MAX_SPLIT_DEPTH);
$MAX_LINK_DEPTH = 4 unless ($MAX_LINK_DEPTH);
$TOC_DEPTH = 4 unless ($TOC_DEPTH);
# A global value may already be set in the $CONFIG_FILE
$INIT_FILE_NAME = $ENV{'L2HINIT_NAME'} || '.latex2html-init'
unless $INIT_FILE_NAME;
# Read the $HOME/$INIT_FILE_NAME if one is found
if (-f "$HOME$dd$INIT_FILE_NAME" && -r _) {
print "Note: Loading $HOME$dd$INIT_FILE_NAME\n";
require("$HOME$dd$INIT_FILE_NAME");
$INIT_FILE = "$HOME$dd$INIT_FILE_NAME";
# _MRO_TODO_: Introduce a version to be checked?
die "Error: You have an out-of-date " . $HOME .
"$dd$INIT_FILE_NAME file.\nPlease update or delete it.\n"
if ($DESTDIR eq '.');
}
# Read the $INIT_FILE_NAME file if one is found in current directory
if ( L2hos->Cwd() ne $HOME && -f ".$dd$INIT_FILE_NAME" && -r _) {
print "Note: Loading .$dd$INIT_FILE_NAME\n";
require(".$dd$INIT_FILE_NAME");
$INIT_FILE = "$INIT_FILE_NAME";
}
die "Error: '.' is an incorrect setting for DESTDIR.\n" .
"Please check your $INIT_FILE_NAME file.\n"
if ($DESTDIR eq '.');
# User home substitutions
$LATEX2HTMLSTYLES =~ s/~([$dd$dd$envkey]|$)/$HOME$1/go;
# the next line fails utterly on non-UNIX systems
$LATEX2HTMLSTYLES =~ s/~([^$dd$dd$envkey]+)/L2hos->home($1)/geo;
#absolutise the paths
$LATEX2HTMLSTYLES = join($envkey,
map(L2hos->Make_directory_absolute($_),
split(/$envkey/o, $LATEX2HTMLSTYLES)));
#HWS: That was the last reference to HOME. Now set HOME to $LATEX2HTMLDIR,
# to enable dvips to see that version of .dvipsrc! But only if we
# have DVIPS_MODE not set - yes - this is a horrible nasty kludge
# MRO: The file has to be updated by configure _MRO_TODO_
if ($PK_GENERATION && ! $DVIPS_MODE) {
$ENV{HOME} = $LATEX2HTMLDIR;
delete $ENV{PRINTER}; # Overrides .dvipsrc
}
# language of the DTD specified in the tag
$ISO_LANGUAGE = 'EN' unless $ISO_LANGUAGE;
# Save the command line arguments, quote where necessary
$argv = join(' ', map {/[\s#*!\$%]/ ? "'$_'" : $_ } @ARGV);
# Pre-process the command line for backward compatibility
foreach(@ARGV) {
s/^--?no_/-no/; # replace e.g. no_fork by nofork
# s/^[+](\d+)$/$1/; # remove + in front of integers
}
# Process command line options
my %opt;
unless(GetOptions(\%opt, # all non-linked options go into %opt
# option linkage (optional)
'help|h',
'version|V',
'split=s',
'link=s',
'toc_depth=i', \$TOC_DEPTH,
'toc_stars!', \$TOC_STARS,
'short_extn!', \$SHORTEXTN,
'iso_language=s', \$ISO_LANGUAGE,
'validate!', \$HTML_VALIDATE,
'latex!',
'djgpp!', \$DJGPP,
'fork!', \$CAN_FORK,
'external_images!', \$EXTERNAL_IMAGES,
'ascii_mode!', \$ASCII_MODE,
'lcase_tags!', \$LOWER_CASE_TAGS,
'ps_images!', \$PS_IMAGES,
'font_size=s', \$FONT_SIZE,
'tex_defs!', \$TEXDEFS,
'navigation!',
'top_navigation!', \$TOP_NAVIGATION,
'bottom_navigation!', \$BOTTOM_NAVIGATION,
'auto_navigation!', \$AUTO_NAVIGATION,
'index_in_navigation!', \$INDEX_IN_NAVIGATION,
'contents_in_navigation!', \$CONTENTS_IN_NAVIGATION,
'next_page_in_navigation!', \$NEXT_PAGE_IN_NAVIGATION,
'previous_page_in_navigation!', \$PREVIOUS_PAGE_IN_NAVIGATION,
'footnode!',
'numbered_footnotes!', \$NUMBERED_FOOTNOTES,
'prefix=s', \$PREFIX,
'auto_prefix!', \$AUTO_PREFIX,
'long_titles=i', \$LONG_TITLES,
'custom_titles!', \$CUSTOM_TITLES,
'title|t=s', \$TITLE,
'rooted!', \$ROOTED,
'rootdir=s',
'dir=s', \$FIXEDDIR,
'mkdir', \$MKDIR,
'address=s', \$ADDRESS,
'noaddress',
'subdir!',
'info=s', \$INFO,
'noinfo',
'auto_link!',
'reuse=i', \$REUSE,
'noreuse',
'antialias_text!', \$ANTI_ALIAS_TEXT,
'antialias!', \$ANTI_ALIAS,
'transparent!', \$TRANSPARENT_FIGURES,
'white!', \$WHITE_BACKGROUND,
'discard!', \$DISCARD_PS,
'image_type=s', \$IMAGE_TYPE,
'images!',
'accent_images=s', \$ACCENT_IMAGES,
'noaccent_images',
'style=s', \$STYLESHEET,
'parbox_images!',
'math!',
'math_parsing!',
'latin!',
'entities!', \$USE_ENTITY_NAMES,
'local_icons!', \$LOCAL_ICONS,
'scalable_fonts!', \$SCALABLE_FONTS,
'images_only!', \$IMAGES_ONLY,
'show_section_numbers!',\$SHOW_SECTION_NUMBERS,
'show_init!', \$SHOW_INIT_FILE,
'init_file=s', \$INIT_FILE,
'up_url=s', \$EXTERNAL_UP_LINK,
'up_title=s', \$EXTERNAL_UP_TITLE,
'down_url=s', \$EXTERNAL_DOWN_LINK,
'down_title=s', \$EXTERNAL_DOWN_TITLE,
'prev_url=s', \$EXTERNAL_PREV_LINK,
'prev_title=s', \$EXTERNAL_PREV_TITLE,
'index=s', \$EXTERNAL_INDEX,
'biblio=s', \$EXTERNAL_BIBLIO,
'contents=s', \$EXTERNAL_CONTENTS,
'external_file=s', \$EXTERNAL_FILE,
'short_index!', \$SHORT_INDEX,
'unsegment!', \$UNSEGMENT,
'debug!', \$DEBUG,
'tmp=s', \$TMP,
'ldump!', \$LATEX_DUMP,
'timing!', \$TIMING,
'verbosity=i', \$VERBOSITY,
'html_version=s', \$HTML_VERSION,
'strict!', \$STRICT_HTML,
'xbit!', \$XBIT_HACK,
'ssi!', \$ALLOW_SSI,
'php!', \$ALLOW_PHP,
'test_mode!' # undocumented switch
)) {
&usage();
exit 1;
}
# interpret options, check option consistency
if(defined $opt{'split'}) {
if ($opt{'split'} =~ /^(\+?)(\d+)$/) {
$MAX_SPLIT_DEPTH = $2;
if ($1) { $MAX_SPLIT_DEPTH *= -1; $REL_DEPTH = 1; }
} else {
&usage;
die "Error: Unrecognised value for -split: $opt{'split'}\n";
}
}
if(defined $opt{'link'}) {
if ($opt{'link'} =~ /^(\+?)(\d+)$/) {
$MAX_LINK_DEPTH = $2;
if ($1) { $MAX_LINK_DEPTH *= -1 }
} else {
&usage;
die "Error: Unrecognised value for -link: $opt{'link'}\n";
}
}
unless ($ISO_LANGUAGE =~ /^[A-Z.]+$/) {
die "Error: Language (-iso_language) must be uppercase and dots only: $ISO_LANGUAGE\n";
}
if ($HTML_VALIDATE && !$HTML_VALIDATOR) {
die "Error: Need a HTML_VALIDATOR when -validate is specified.\n";
}
&set_if_false($NOLATEX,$opt{latex}); # negate the option...
if ($ASCII_MODE || $PS_IMAGES) {
$EXTERNAL_IMAGES = 1;
}
if ($FONT_SIZE && $FONT_SIZE !~ /^\d+pt$/) {
die "Error: Font size (-font_size) must end with 'pt': $FONT_SIZE\n"
}
&set_if_false($NO_NAVIGATION,$opt{navigation});
&set_if_false($NO_FOOTNODE,$opt{footnode});
if (defined $TITLE && !length($TITLE)) {
die "Error: Empty title (-title).\n";
}
if ($opt{rootdir}) {
$ROOTED = 1;
$FIXEDDIR = $opt{rootdir};
}
if ($FIXEDDIR && !-d $FIXEDDIR) {
if ($MKDIR) {
print "\n *** creating directory: $FIXEDDIR ";
die "Failed: $!\n" unless (mkdir($FIXEDDIR, 0755));
# _TODO_ use File::Path to create a series of directories
} else {
&usage;
die "Error: Specified directory (-rootdir, -dir) does not exist.\n";
}
}
&set_if_false($NO_SUBDIR, $opt{subdir});
&set_if_false($NO_AUTO_LINK, $opt{auto_link});
if ($opt{noreuse}) {
$REUSE = 0;
}
unless(grep(/^\Q$IMAGE_TYPE\E$/o, @IMAGE_TYPES)) {
die <<"EOF";
Error: No such image type '$IMAGE_TYPE'.
This installation supports (first is default): @IMAGE_TYPES
EOF
}
&set_if_false($NO_IMAGES, $opt{images});
if ($opt{noaccent_images}) {
$ACCENT_IMAGES = '';
}
if($opt{noaddress}) {
$ADDRESS = '';
}
if($opt{noinfo}) {
$INFO = 0;
}
if($ACCENT_IMAGES && $ACCENT_IMAGES !~ /^[a-zA-Z,]+$/) {
die "Error: Single word or comma-list of style words needed for -accent_images, not: $_\n";
}
&set_if_false($NO_PARBOX_IMAGES, $opt{parbox_images});
&set_if_false($NO_SIMPLE_MATH, $opt{math});
if (defined $opt{math_parsing}) {
$NO_MATH_PARSING = !$opt{math_parsing};
$NO_SIMPLE_MATH = !$opt{math_parsing} unless(defined $opt{math});
}
&set_if_false($NO_ISOLATIN, $opt{latin});
if ($INIT_FILE) {
if (-f $INIT_FILE && -r _) {
print "Note: Initialising with file: $INIT_FILE\n"
if ($DEBUG || $VERBOSITY);
require($INIT_FILE);
} else {
die "Error: Could not find file (-init_file): $INIT_FILE\n";
}
}
foreach($EXTERNAL_UP_LINK, $EXTERNAL_DOWN_LINK, $EXTERNAL_PREV_LINK,
$EXTERNAL_INDEX, $EXTERNAL_BIBLIO, $EXTERNAL_CONTENTS) {
$_ ||= ''; # initialize
s/~/~/g; # protect `~'
}
if($TMP && !(-d $TMP && -w _)) {
die "Error: '$TMP' not usable as temporary directory.\n";
}
if ($opt{help}) {
L2hos->perldoc($SCRIPT);
exit 0;
}
if ($opt{version}) {
&banner();
exit 0;
}
if ($opt{test_mode}) {
$TITLE = 'LaTeX2HTML Test Document';
$TEXEXPAND = "$PERL /private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}texexpand";
$PSTOIMG = "$PERL /private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}pstoimg";
$ICONSERVER = L2hos->path2URL("/private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}icons");
$TEST_MODE = 1;
$RGBCOLORFILE = "/private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}styles${dd}rgb.txt";
$CRAYOLAFILE = "/private/Network/Servers/www-jlc-in/home/htdocs/subg/ir/study/latex/latex2html-2002-2-1${dd}styles${dd}crayola.txt";
}
if($DEBUG) {
# make the OS-dependent functions more chatty, too
$L2hos::Verbose = 1;
}
undef %opt; # not needed any more
$FIXEDDIR = $FIXEDDIR || $DESTDIR || ''; # for backward compatibility
if ($EXTERNAL_UP_TITLE xor $EXTERNAL_UP_LINK) {
warn "Warning (-up_url, -up_title): Need to specify both a parent URL and a parent title!\n";
$EXTERNAL_UP_TITLE = $EXTERNAL_UP_LINK = "";
}
if ($EXTERNAL_DOWN_TITLE xor $EXTERNAL_DOWN_LINK) {
warn "Warning (-down_url, -down_title): Need to specify both a parent URL and a parent title!\n";
$EXTERNAL_DOWN_TITLE = $EXTERNAL_DOWN_LINK = "";
}
# $NO_NAVIGATION = 1 unless $MAX_SPLIT_DEPTH; # Martin Wilck
if ($MAX_SPLIT_DEPTH && $MAX_SPLIT_DEPTH < 0) {
$MAX_SPLIT_DEPTH *= -1; $REL_DEPTH = 1;
}
if ($MAX_LINK_DEPTH && $MAX_LINK_DEPTH < 0) {
$MAX_LINK_DEPTH *= -1; $LEAF_LINKS = 1;
}
$FOOT_FILENAME = 'footnode' unless ($FOOT_FILENAME);
$NO_FOOTNODE = 1 unless ($MAX_SPLIT_DEPTH || $NO_FOOTNODE);
$NO_SPLIT = 1 unless $MAX_SPLIT_DEPTH; # _MRO_TODO_: is this needed at all?
$SEGMENT = $SEGMENTED = 0;
$NO_MATH_MARKUP = 1;
# specify the filename extension to use with the generated HTML files
if ($SHORTEXTN) { $EXTN = ".htm"; } # for HTML files on CDROM
elsif ($ALLOW_PHP) { $EXTN = ".php"; } # has PHP dynamic includes
# with server-side includes (SSI) :
elsif ($ALLOW_SSI && !$XBIT_HACK) { $EXTN = ".shtml"; }
# ordinary names, valid also for SSI with XBit hack :
else { $EXTN = ".html"; }
$NODE_NAME = 'node' unless (defined $NODE_NAME);
# space for temporary files
# different to the $TMPDIR for image-generation
# MRO: No directory should end with $dd!
$TMP_ = "TMP";
$TMP_PREFIX = "l2h" unless ($TMP_PREFIX);
# This can be set to 1 when using a version of dvips that is safe
# from the "dot-in-name" bug.
# _TODO_ this should be determined by configure
#$DVIPS_SAFE = 1;
$CHARSET = $charset || 'iso-8859-1';
####################################################################
#
# If possible, use icons of the same type as generated images
#
if ($IMAGE_TYPE && defined %{"icons_$IMAGE_TYPE"}) {
%icons = %{"icons_$IMAGE_TYPE"};
}
####################################################################
#
# Figure out what options we need to pass to DVIPS and store that in
# the $DVIPSOPT variable. Also, scaling is taken care of at the
# dvips level if PK_GENERATION is set to 1, so adjust SCALE_FACTORs
# accordingly.
#
if ($SCALABLE_FONTS) {
$PK_GENERATION = 0;
$DVIPS_MODE = '';
}
if ($PK_GENERATION) {
if ($MATH_SCALE_FACTOR <= 0) { $MATH_SCALE_FACTOR = 2; }
if ($FIGURE_SCALE_FACTOR <= 0) { $FIGURE_SCALE_FACTOR = 2; }
my $saveMSF = $MATH_SCALE_FACTOR;
my $saveFSF = $FIGURE_SCALE_FACTOR;
my $desired_dpi = int($MATH_SCALE_FACTOR*75);
$FIGURE_SCALE_FACTOR = ($METAFONT_DPI / 72) *
($FIGURE_SCALE_FACTOR / $MATH_SCALE_FACTOR) ;
$MATH_SCALE_FACTOR = $METAFONT_DPI / 72;
$dvi_mag = int(1000 * $desired_dpi / $METAFONT_DPI);
if ($dvi_mag > 1000) {
&write_warnings(
"WARNING: Your SCALE FACTOR is too large for PK_GENERATION.\n" .
" See $CONFIG_FILE for more information.\n");
}
# RRM: over-sized scaling, using dvi-magnification
if ($EXTRA_IMAGE_SCALE) {
print "\n *** Images at $EXTRA_IMAGE_SCALE times resolution of displayed size ***\n";
$desired_dpi = int($EXTRA_IMAGE_SCALE * $desired_dpi+.5);
print " desired_dpi = $desired_dpi METAFONT_DPI = $METAFONT_DPI\n"
if $DEBUG;
$dvi_mag = int(1000 * $desired_dpi / $METAFONT_DPI);
$MATH_SCALE_FACTOR = $saveMSF;
$FIGURE_SCALE_FACTOR = $saveFSF;
}
# no space after "-y", "-D", "-e" --- required by DVIPS under DOS !
my $mode_switch = "-mode $DVIPS_MODE" if $DVIPS_MODE;
$DVIPSOPT .= " -y$dvi_mag -D$METAFONT_DPI $mode_switch -e5 ";
} else { # no PK_GENERATION
# if ($EXTRA_IMAGE_SCALE) {
# &write_warnings(
# "the \$EXTRA_IMAGE_SCALE feature requires either \$PK_GENERATION=1"
# . " or the '-scalable_fonts' option");
# $EXTRA_IMAGE_SCALE = '';
# }
# MRO: shifted to l2hconf
#$DVIPSOPT .= ' -M';
} # end PK_GENERATION
# The mapping from numbers to accents.
# These are required to process the \accent command, which is found in
# tables of contents whenever there is an accented character in a
# caption or section title. Processing the \accent command makes
# $encoded_*_number work properly (see &extract_captions) with
# captions that contain accented characters.
# I got the numbers from the plain.tex file, version 3.141.
# Missing entries should be looked up by a native speaker.
# Have a look at generate_accent_commands and $iso_8859_1_character_map.
# MEH: added more accent types
# MRO: only uppercase needed!
%accent_type = (
'18' => 'grave', # \`
'19' => 'acute', # `'
'20' => 'caron', # \v
'21' => 'breve', # \u
'22' => 'macr', # \=
'23' => 'ring', #
'24' => 'cedil', # \c
'94' => 'circ', # \^
'95' => 'dot', # \.
'7D' => 'dblac', # \H
'7E' => 'tilde', # \~
'7F' => 'uml', # \"
);
&driver;
exit 0; # clean exit, no errors
############################ Subroutines ##################################
#check that $TMP is writable, if so create a subdirectory
sub make_tmp_dir {
&close_dbm_database if $DJGPP; # to save file-handles
# determine a suitable temporary path
#
$TMPDIR = '';
my @tmp_try = ();
push(@tmp_try, $TMP) if($TMP);
push(@tmp_try, "$DESTDIR$dd$TMP_") if($TMP_);
push(@tmp_try, $DESTDIR) if($DESTDIR);
push(@tmp_try, L2hos->Cwd());
my $try;
TempTry: foreach $try (@tmp_try) {
next unless(-d $try && -w _);
my $tmp = "$try$dd$TMP_PREFIX$$";
if(mkdir($tmp,0755)) {
$TMPDIR=$tmp;
last TempTry;
} else {
warn "Warning: Cannot create temporary directory '$tmp': $!\n";
}
}
$dvips_warning = <<"EOF";
Warning: There is a '.' in \$TMPDIR, $DVIPS will probably fail.
Set \$TMP to use a /tmp directory, or rename the working directory.
EOF
die ($dvips_warning . "\n\$TMPDIR=$TMPDIR ***\n\n")
if ($TMPDIR =~ /\./ && $DVIPS =~ /dvips/ && !$DVIPS_SAFE);
&open_dbm_database if $DJGPP;
}
# MRO: set first parameter to the opposite of the second if second parameter is defined
sub set_if_false {
$_[0] = !$_[1] if(defined $_[1]);
}
sub check_for_dots {
local($file) = @_;
if ($file =~ /\.[^.]*\./) {
die "\n\n\n *** Fatal Error --- but easy to fix ***\n"
. "\nCannot have '.' in file-name prefix, else dvips fails on images"
. "\nChange the name from $file and try again.\n\n";
}
}
# Process each file ...
sub driver {
local($FILE, $orig_cwd, %unknown_commands, %dependent, %depends_on
, %styleID, %env_style, $bbl_cnt, $dbg, %numbered_section);
# MRO: $texfilepath has to be global!
local(%styles_loaded);
$orig_cwd = L2hos->Cwd();
print "\n *** initialise *** " if ($VERBOSITY > 1);
&initialise; # Initialise some global variables
print "\n *** check modes *** " if ($VERBOSITY > 1);
&ascii_mode if $ASCII_MODE; # Must come after initialization
&titles_language($TITLES_LANGUAGE);
&make_numbered_footnotes if ($NUMBERED_FOOTNOTES);
$dbg = $DEBUG ? "-debug" : "";
$dbg .= (($VERBOSITY>2) ? " -verbose" : "");
#use the same hashes for all files in a batch
local(%cached_env_img, %id_map, %symbolic_labels, %latex_labels)
if ($FIXEDDIR && $NO_SUBDIR);
local($MULTIPLE_FILES,$THIS_FILE);
$MULTIPLE_FILES = 1+$#ARGV if $ROOTED;
print "\n *** $MULTIPLE_FILES file".($MULTIPLE_FILES ? 's: ' : ': ')
. join(',',@ARGV) . " *** " if ($VERBOSITY > 1);
local(%section_info, %toc_section_info, %cite_info, %ref_files);
foreach $FILE (@ARGV) {
&check_for_dots($FILE) unless $DVIPS_SAFE;
++$THIS_FILE if $MULTIPLE_FILES;
do {
%section_info = ();
%toc_section_info = ();
%cite_info = ();
%ref_files = ();
} unless $MULTIPLE_FILES;
local($bbl_nr) = 1;
# The number of reused images and those in images.tex
local($global_page_num) = (0) unless($FIXEDDIR && $NO_SUBDIR);
# The number of images in images.tex
local($new_page_num) = (0); # unless($FIXEDDIR && $NO_SUBDIR);
local($pid, $sections_rx,
, $outermost_level, %latex_body, $latex_body
, %encoded_section_number
, %verbatim, %new_command, %new_environment
, %provide_command, %renew_command, %new_theorem
, $preamble, $aux_preamble, $prelatex, @preamble);
# must retain these when all files are in the same directory
# else the images.pl and labels.pl files get clobbered
unless ($FIXEDDIR && $NO_SUBDIR) {
print "\nResetting image-cache" if ($#ARGV);
local(%cached_env_img, %id_map, %symbolic_labels, %latex_labels)
}
## AYS: Allow extension other than .tex and make it optional
($EXT = $FILE) =~ s/.*\.([^\.]*)$/$1/;
if ( $EXT eq $FILE ) {
$EXT = "tex";
$FILE =~ s/$/.tex/;
}
#RRM: allow user-customisation, dependent on file-name
# e.g. add directories to $TEXINPUTS named for the file
# --- idea due to Fred Drake
&custom_driver_hook($FILE) if (defined &custom_driver_hook);
# JCL(jcl-dir)
# We need absolute paths for TEXINPUTS here, because
# we change the directory
if ($orig_cwd eq $texfilepath) {
&deal_with_texinputs($orig_cwd);
} else {
&deal_with_texinputs($orig_cwd, $texfilepath);
}
($texfilepath, $FILE) = &get_full_path($FILE);
$texfilepath = '.' unless($texfilepath);
die "Cannot read $texfilepath$dd$FILE \n"
unless (-f "$texfilepath$dd$FILE");
# Tell texexpand which files we *don't* want to look at.
$ENV{'TEXE_DONT_INCLUDE'} = $DONT_INCLUDE if $DONT_INCLUDE;
# Tell texexpand which files we *do* want to look at, e.g.
# home-brew style files
$ENV{'TEXE_DO_INCLUDE'} = $DO_INCLUDE if $DO_INCLUDE;
$FILE =~ s/\.[^\.]*$//; ## AYS
$DESTDIR = ''; # start at empty
if ($FIXEDDIR) {
$DESTDIR = $FIXEDDIR unless ($FIXEDDIR eq '.');
if (($ROOTED)&&!($texfilepath eq $orig_cwd)) {
$DESTDIR .= $dd . $FILE unless $NO_SUBDIR;
};
} elsif ($texfilepath eq $orig_cwd) {
$DESTDIR = ($NO_SUBDIR ? '.' : $FILE);
} else {
$DESTDIR = $ROOTED ? '.' : $texfilepath;
$DESTDIR .= $dd . $FILE unless $NO_SUBDIR;
}
$PREFIX = "$FILE-" if $AUTO_PREFIX;
print "\nOPENING $texfilepath$dd$FILE.$EXT \n"; ## AYS
next unless (&new_dir($DESTDIR,''));
# establish absolute path to $DESTDIR
$DESTDIR = L2hos->Make_directory_absolute($DESTDIR);
&make_tmp_dir;
print "\nNote: Working directory is $DESTDIR\n";
print "Note: Images will be generated in $TMPDIR\n\n";
# Need to clean up a bit in case there's garbage left
# from former runs.
if ($DESTDIR) { chdir($DESTDIR) || die "$!\n"; }
if (opendir (TMP,$TMP_)) {
foreach (readdir TMP) {
L2hos->Unlink("TMP_$dd$_") unless (/^\.\.?$/);
}
closedir TMP;
}
&cleanup(1);
unless(-d $TMP_) {
mkdir($TMP_, 0755) ||
die "Cannot create directory '$TMP_': $!\n";
}
chdir($orig_cwd);
# RRM 14/5/98 moved this to occur earlier
## JCL(jcl-dir)
## We need absolute paths for TEXINPUTS here, because
## we change the directory
# if ($orig_cwd eq $texfilepath) {
# &deal_with_texinputs($orig_cwd);
# } else {
# &deal_with_texinputs($orig_cwd, $texfilepath);
# }
# This needs $DESTDIR to have been created ...
print " *** calling `texexpand' ***" if ($VERBOSITY > 1);
local($unseg) = ($UNSEGMENT ? "-unsegment " : "");
# does DOS need to check these here ?
# die "File $TEXEXPAND does not exist or is not executable\n"
# unless (-x $TEXEXPAND);
L2hos->syswait("$TEXEXPAND $dbg -auto_exclude $unseg"
. "-save_styles $DESTDIR$dd$TMP_${dd}styles "
. ($TEXINPUTS ? "-texinputs $TEXINPUTS " : '' )
. (($VERBOSITY >2) ? "-verbose " : '' )
. "-out $DESTDIR$dd$TMP_$dd$FILE "
. "$texfilepath$dd$FILE.$EXT")
&& die " texexpand failed: $!\n";
print STDOUT "\n *** `texexpand' done ***\n" if ($VERBOSITY > 1);
chdir($DESTDIR) if $DESTDIR;
$SIG{'INT'} = 'handler';
&open_dbm_database;
&initialise_sections;
print STDOUT "\n *** database open ***\n" if ($VERBOSITY > 1);
if ($IMAGES_ONLY) {
&make_off_line_images;
} else {
&rename_image_files;
&load_style_file_translations;
&make_language_rx;
&make_raw_arg_cmd_rx;
# &make_isolatin1_rx unless ($NO_ISOLATIN);
&translate_titles;
&make_sections_rx;
print "\nReading ...";
if ($SHORT_FILENAME) {
L2hos->Rename ("$TMP_$dd$FILE" ,"$TMP_$dd$SHORT_FILENAME" );
&slurp_input_and_partition_and_pre_process(
"$TMP_$dd$SHORT_FILENAME");
} else {
&slurp_input_and_partition_and_pre_process("$TMP_$dd$FILE");
}
&add_preamble_head;
# Create a regular expressions
&set_depth_levels;
&make_sections_rx;
&make_order_sensitive_rx;
&add_document_info_page if ($INFO && !(/\\htmlinfo/));
&add_bbl_and_idx_dummy_commands;
&translate; # Destructive!
}
&style_sheet;
&close_dbm_database;
&cleanup();
#JCL: read warnings from file to $warnings
local($warnings) = &get_warnings;
print "\n\n*********** WARNINGS *********** \n$warnings"
if ($warnings || $NO_IMAGES || $IMAGES_ONLY);
&image_cache_message if ($NO_IMAGES || $IMAGES_ONLY);
&image_message if ($warnings =~ /Failed to convert/io);
undef $warnings;
# JCL - generate directory index entry.
# Yet, a hard link, cause Perl lacks symlink() on some systems.
do {
local($EXTN) = $EXTN;
$EXTN =~ s/_\w+(\.html?)/$1/ if ($frame_main_name);
local($from,$to) = (eval($LINKPOINT),eval($LINKNAME));
if (length($from) && length($to) && ($from ne $to)) {
#frames may have altered $EXTN
$from =~ s/$frame_main_name(\.html?)/$1/ if ($frame_main_name);
$to =~ s/$frame_main_name(\.html?)/$1/ if ($frame_main_name);
L2hos->Unlink($to);
L2hos->Link($from,$to);
}
} unless ($NO_AUTO_LINK || !($LINKPOINT) || !($LINKNAME));
&html_validate if ($HTML_VALIDATE && $HTML_VALIDATOR);
# Go back to the source directory
chdir($orig_cwd);
$TEST_MODE = $DESTDIR if($TEST_MODE); # save path
$DESTDIR = '';
$OUT_NODE = 0 unless $FIXEDDIR;
$STYLESHEET = '' if ($STYLESHEET =~ /^\Q$FILE./);
}
print "\nUnknown commands: ". join(" ",keys %unknown_commands)
if %unknown_commands;
###MEH -- math support
print "\nMath commands outside math: " .
join(" ",keys %commands_outside_math) .
"\n Output may look weird or may be faulty!\n"
if %commands_outside_math;
print "\nDone.\n";
if($TEST_MODE) {
$TEST_MODE =~ s:[$dd$dd]+$::;
print "\nTo view the results, point your browser at:\n",
L2hos->path2URL(L2hos->Make_directory_absolute($TEST_MODE).$dd.
"index$EXTN"),"\n";
}
$end_time = time;
$total_time = $end_time - $start_time;
print STDOUT join(' ',"Timing:",$total_time,"seconds\n")
if ($TIMING||$DEBUG||($VERBOSITY > 2));
$_;
}
sub open_dbm_database {
# These are DBM (unix DataBase Management) arrays which are actually
# stored in external files. They are used for communication between
# the main process and forked child processes;
print STDOUT "\n"; # this mysteriously prevents a core dump !
dbmopen(%verb, "$TMP_${dd}verb",0755);
# dbmopen(%verbatim, "$TMP_${dd}verbatim",0755);
dbmopen(%verb_delim, "$TMP_${dd}verb_delim",0755);
dbmopen(%expanded,"$TMP_${dd}expanded",0755);
# Holds max_id, verb_counter, verbatim_counter, eqn_number
dbmopen(%global, "$TMP_${dd}global",0755);
# Hold style sheet information
dbmopen(%env_style, "$TMP_${dd}envstyles",0755);
dbmopen(%txt_style, "$TMP_${dd}txtstyles",0755);
dbmopen(%styleID, "$TMP_${dd}styleIDs",0755);
# These next two are used during off-line image conversion
# %new_id_map maps image id's to page_numbers of the images in images.tex
# %image_params maps image_ids to conversion parameters for that image
dbmopen(%new_id_map, "$TMP_${dd}ID_MAP",0755);
dbmopen(%img_params, "$TMP_${dd}IMG_PARAMS",0755);
dbmopen(%orig_name_map, "$TMP_${dd}ORIG_MAP",0755);
$global{'max_id'} = ($global{'max_id'} | 0);
&read_mydb(\%verbatim, "verbatim");
$global{'verb_counter'} = ($global{'verb_counter'} | 0);
$global{'verbatim_counter'} = ($global{'verbatim_counter'} | 0);
&read_mydb(\%new_command, "new_command");
&read_mydb(\%renew_command, "renew_command");
&read_mydb(\%provide_command, "provide_command");
&read_mydb(\%new_theorem, "new_theorem");
&read_mydb(\%new_environment, "new_environment");
&read_mydb(\%dependent, "dependent");
# &read_mydb(\%env_style, "env_style");
# &read_mydb(\%styleID, "styleID");
# MRO: Why should we use read_mydb instead of catfile?
$preamble = &catfile(&_dbname("preamble"),1) || '';
$prelatex = &catfile(&_dbname("prelatex"),1) || '';
$aux_preamble = &catfile(&_dbname("aux_preamble"),1) || '';
&restore_critical_variables;
}
sub close_dbm_database {
&save_critical_variables;
dbmclose(%verb); undef %verb;
# dbmclose(%verbatim); undef %verbatim;
dbmclose(%verb_delim); undef %verb_delim;
dbmclose(%expanded); undef %expanded;
dbmclose(%global); undef %global;
dbmclose(%env_style); undef %env_style;
dbmclose(%style_id); undef %style_id;
dbmclose(%new_id_map); undef %new_id_map;
dbmclose(%img_params); undef %img_params;
dbmclose(%orig_name_map); undef %orig_name_map;
dbmclose(%txt_style); undef %txt_style;
dbmclose(%styleID); undef %styleID;
}
sub clear_images_dbm_database {
#
# %new_id_map will be used by the off-line image conversion process
#
dbmclose(%new_id_map);
dbmclose(%img_params);
dbmclose(%orig_name_map);
undef %new_id_map;
undef %img_params;
undef %orig_name_map;
dbmopen(%new_id_map, "$TMP_${dd}ID_MAP",0755);
dbmopen(%img_params, "$TMP_${dd}IMG_PARAMS",0755);
dbmopen(%orig_name_map, "$TMP_${dd}ORIG_MAP",0755);
}
sub initialise_sections {
local($key);
foreach $key (keys %numbered_section) {
$global{$key} = $numbered_section{$key}}
}
sub save_critical_variables {
$global{'math_markup'} = $NO_MATH_MARKUP;
$global{'charset'} = $CHARSET;
$global{'charenc'} = $charset;
$global{'language'} = $default_language;
$global{'isolatin'} = $ISOLATIN_CHARS;
$global{'unicode'} = $UNICODE_CHARS;
if ($UNFINISHED_ENV) {
$global{'unfinished_env'} = $UNFINISHED_ENV;
$global{'replace_end_env'} = $REPLACE_END_ENV;
}
$global{'unfinished_comment'} = $UNFINISHED_COMMENT;
if (@UNMATCHED_OPENING) {
$global{'unmatched'} = join(',',@UNMATCHED_OPENING);
}
}
sub restore_critical_variables {
$NO_MATH_MARKUP = ($global{'math_markup'}|
(defined $NO_MATH_MARKUP ? $NO_MATH_MARKUP:1));
$CHARSET = ($global{'charset'}| $CHARSET);
$charset = ($global{'charenc'}| $charset);
$default_language = ($global{'language'}|
(defined $default_language ? $default_language:'english'));
$ISOLATIN_CHARS = ($global{'isolatin'}|
(defined $ISOLATIN_CHARS ? $ISOLATIN_CHARS:0));
$UNICODE_CHARS = ($global{'unicode'}|
(defined $UNICODE_CHARS ? $UNICODE_CHARS:0));
if ($global{'unfinished_env'}) {
$UNFINISHED_ENV = $global{'unfinished_env'};
$REPLACE_END_ENV = $global{'replace_end_env'};
}
$UNFINISHED_COMMENT = $global{'unfinished_comment'};
if ($global{'unmatched'}) {
@UNMATCHED_OPENING = split(',',$global{'unmatched'});
}
# undef any renewed-commands...
# so the new defs are read from %new_command
local($cmd,$key,$code);
foreach $key (keys %renew_command) {
$cmd = "do_cmd_$key";
$code = "undef \&$cmd"; eval($code) if (defined &$cmd);
if ($@) { print "\nundef \&do_cmd_$cmd failed"}
}
}
#JCL: The warnings should have been handled within the DBM database.
# Unfortunately if the contents of an array are more than ~900 (system
# dependent) chars long then dbm cannot handle it and gives error messages.
sub write_warnings { #clean
my ($str) = @_;
$str .= "\n" unless($str =~ /\n$/);
print STDOUT "\n *** Warning: $str" if ($VERBOSITY > 1);
my $warnings = '';
if(-f 'WARNINGS') {
$warnings = &catfile('WARNINGS') || '';
}
return () if ($warnings =~ /\Q$str\E/);
if(open(OUT,">>WARNINGS")) {
print OUT $str;
close OUT;
} else {
print "\nError: Cannot append to 'WARNINGS': $!\n";
}
}
sub get_warnings {
return &catfile('WARNINGS',1) || '';
}
# MRO: Standardizing
sub catfile {
my ($file,$ignore) = @_;
unless(open(CATFILE,"<$file")) {
print "\nError: Cannot read '$file': $!\n"
unless($ignore);
return undef;
}
local($/) = undef; # slurp in whole file
my $contents = ;
close(CATFILE);
$contents;
}
sub html_validate {
my ($extn) = $EXTN;
if ($EXTN !~ /^\.html?$/i) {
$extn =~ s/^[^\.]*(\.html?)$/$1/;
}
print "\n *** Validating ***\n";
my @htmls = glob("*$extn");
my $file;
foreach $file (@htmls) {
system("$HTML_VALIDATOR $file");
}
}
sub lost_argument {
local($cmd) = @_;
&write_warnings("\nincomplete argument to command: \\$cmd");
}
# These subroutines should have been handled within the DBM database.
# Unfortunately if the contents of an array are more than ~900 (system
# dependent) chars long then dbm cannot handle it and gives error messages.
# So here we save and then read the contents explicitly.
sub write_mydb {
my ($db, $key, $str) = @_;
&write_mydb_simple($db, "\n$mydb_mark#$key#$str");
}
# generate the DB file name from the DB name
sub _dbname {
"$TMP_$dd$_[0]";
}
sub write_mydb_simple {
my ($db, $str) = @_;
my $file = &_dbname($db);
if(open(DB,">>$file")) {
print DB $str;
close DB;
} else {
print "\nError: Cannot append to '$file': $!\n";
}
}
sub clear_mydb {
my ($db) = @_;
my $file = &_dbname($db);
if(open(DB,">$file")) {
close DB;
} else {
print "\nError: Cannot clear '$file': $!\n";
}
}
# Assumes the existence of a DB file which contains
# sequences of e.g. verbatim counters and verbatim contents.
sub read_mydb {
my ($dbref,$name) = @_;
my $contents = &catfile(&_dbname($name),1);
return '' unless(defined $contents);
my @tmp = split(/\n$mydb_mark#([^#]*)#/, $contents);
my $i = 1; # Ignore the first element at 0
print "\nDBM: $name open..." if ($VERBOSITY > 2);
while ($i < scalar(@tmp)) {
my $tmp1 = $tmp[$i];
my $tmp2 = $tmp[++$i];
$$dbref{$tmp1} = defined $tmp2 ? $tmp2 : '';
++$i;
};
$contents;
}
# Reads in a latex generated file (e.g. .bbl or .aux)
# It returns success or failure
# ****** and binds $_ in the caller as a side-effect ******
sub process_ext_file {
local($ext) = @_;
local($found, $extfile,$dum,$texpath);
$extfile = $EXTERNAL_FILE||$FILE;
local($file) = &fulltexpath("$extfile.$ext");
$found = 0;
&write_warnings(
"\n$extfile.$EXT is newer than $extfile.$ext: Please rerun latex" . ## AYS
(($ext =~ /bbl/) ? " and bibtex.\n" : ".\n"))
if ( ($found = (-f $file)) &&
&newer(&fulltexpath("$extfile.$EXT"), $file)); ## AYS
if ((!$found)&&($extfile =~ /\.$EXT$/)) {
$file = &fulltexpath("$extfile");
&write_warnings(
"\n$extfile is newer than $extfile: Please rerun latex" . ## AYS
(($ext =~ /bbl/) ? " and bibtex.\n" : ".\n"))
if ( ($found = (-f $file)) &&
&newer(&fulltexpath("$extfile"), $file)); ## AYS
}
# check in other directories on the $TEXINPUTS paths
if (!$found) {
foreach $texpath (split /$envkey/, $TEXINPUTS ) {
$file = "$texpath$dd$extfile.$ext";
last if ($found = (-f $file));
}
}
if ( $found ) {
print "\nReading $ext file: $file ...";
# must allow @ within control-sequence names
$dum = &do_cmd_makeatletter();
&slurp_input($file);
if ($ext =~ /bbl/) {
# remove the \newcommand{\etalchar}{...} since not needed
s/^\\newcommand{\\etalchar}[^\n\r]*[\n\r]+//s;
}
&pre_process;
&substitute_meta_cmds if (%new_command || %new_environment);
if ($ext eq "aux") {
my $latex_pathname = L2hos->path2latex($file);
$aux_preamble .=
"\\AtBeginDocument{\\makeatletter\n\\input $latex_pathname\n\\makeatother\n}\n";
local(@extlines) = split ("\n", $_);
print " translating ".(0+@extlines). " lines " if ($VERBOSITY >1);
local($eline,$skip_to); #$_ = '';
foreach $eline (@extlines) {
if ($skip_to) { next unless ($eline =~ s/$O$skip_to$C//) }
$skip_to = '';
# skip lines added for pdfTeX/hyperref compatibility
next if ($eline =~ /^\\(ifx|else|fi|global \\let|gdef|AtEndDocument|let )/);
# remove \index and \label commands, else invalid links may result
$eline =~ s/\\(index|label)\s*($O\d+$C).*\2//g;
if ($eline =~ /\\(old)?contentsline/) {
do { local($_,$save_AUX) = ($eline,$AUX_FILE);
$AUX_FILE = 0;
&wrap_shorthand_environments;
#footnote markers upset the numbering
s/\\footnote(mark|text)?//g;
$eline = &translate_environments($_);
$AUX_FILE = $save_AUX;
undef $_ };
} elsif ($eline =~ s/^\\\@input//) {
&do_cmd__at_input($eline);
$eline = '';
} elsif ($eline =~ s/^\\\@setckpt$O(\d+)$C//) {
$skip_to = $1; next;
}
# $eline =~ s/$image_mark#([^#]+)#/print "\nIMAGE:",$img_params{$1},"\n";''/e;
# $_ .= &translate_commands(&translate_environments($eline));
$_ .= &translate_commands($eline) if $eline;
}
undef @extlines;
} elsif ($ext =~ /$caption_suffixes/) {
local(@extlines) = split ("\n", $_);
print " translating ".(0+@extlines). " lines "if ($VERBOSITY >1);
local($eline); $_ = '';
foreach $eline (@extlines) {
# remove \index and \label commands, else invalid links may result
$eline =~ s/\\(index|label)\s*($O\d+$C).*\2//gso;
if ($eline =~ /\\(old)?contentsline/) {
do { local($_,$save_PREAMBLE) = ($eline,$PREAMBLE);
$PREAMBLE = 0;
&wrap_shorthand_environments;
$eline = &translate_environments($_);
$PREAMBLE = $save_PREAMBLE;
undef $_ };
}
$_ .= &translate_commands($eline);
}
undef @extlines;
} else {
print " wrapping " if ($VERBOSITY >1);
&wrap_shorthand_environments;
$_ = &translate_commands(&translate_environments($_));
print " translating " if ($VERBOSITY >1);
}
print "\n processed size: ".length($_)."\n" if($VERBOSITY>1);
$dum = &do_cmd_makeatother();
} else {
print "\n*** Could not find file: $file ***\n" if ($DEBUG)
};
$found;
}
sub deal_with_texinputs {
# The dot precedes all, this let's local files override always.
# The dirs we want are given as parameter list.
if(!$TEXINPUTS) { $TEXINPUTS = '.' }
elsif ($TEXINPUTS =~ /^$envkey/) {
$TEXINPUTS = '.'.$TEXINPUTS
};
if ($ROOTED) {$TEXINPUTS .= "$envkey$FIXEDDIR"}
$TEXINPUTS = &absolutize_path($TEXINPUTS);
$ENV{'TEXINPUTS'} = join($envkey,".",@_,$TEXINPUTS,$ENV{'TEXINPUTS'});
}
# provided by Fred Drake
sub absolutize_path {
my ($path) = @_;
my $npath = '';
foreach $dir (split /$envkey/o, $path) {
$npath .= L2hos->Make_directory_absolute($dir) . $envkey;
}
$npath =~ s/$envkey$//;
$npath;
}
sub add_document_info_page {
# Uses $outermost_level
# Nasty race conditions if the next two are done in parallel
local($X) = ++$global{'max_id'};
local($Y) = ++$global{'max_id'};
###MEH -- changed for math support: no underscores in commandnames
$_ = join('', $_
, (($MAX_SPLIT_DEPTH <= $section_commands{$outermost_level})?
"\n
\n" : '')
, "\\$outermost_level", "*"
, "$O$X$C$O$Y$C\\infopagename$O$Y$C$O$X$C\n",
, " \\textohtmlinfopage");
}
# For each style file name in TMP_styles (generated by texexpand) look for a
# perl file in $LATEX2HTMLDIR/styles and load it.
sub load_style_file_translations {
local($_, $style, $options, $dir);
print "\n";
if ($TEXDEFS) {
foreach $dir (split(/$envkey/,$LATEX2HTMLSTYLES)) {
if (-f ($_ = "$dir${dd}texdefs.perl")) {
print "\nLoading $_...";
require ($_);
$styles_loaded{'texdefs'} = 1;
last;
}
}
}
# packages automatically implemented
local($auto_styles) = $AUTO_STYLES;
$auto_styles .= 'array|' if ($HTML_VERSION > 3.1);
$auto_styles .= 'tabularx|' if ($HTML_VERSION > 3.1);
$auto_styles .= 'theorem|';
# these are not packages, but can appear as if class-options
$auto_styles .= 'psamsfonts|';
$auto_styles .= 'noamsfonts|';
$auto_styles =~ s/\|$//;
if(open(STYLES, "<$TMP_${dd}styles")) {
while() {
if(s/^\s*(\S+)\s*(.*)$/$style = $1; $options = $2;/eo) {
&do_require_package($style);
$_ = $DONT_INCLUDE;
s/:/|/g;
&write_warnings("No implementation found for style \`$style\'\n")
unless ($styles_loaded{$style} || $style =~ /^($_)$/
|| $style =~ /$auto_styles/);
# MRO: Process options for packages
&do_package_options($style,$options) if($options);
}
}
close(STYLES);
} else {
print "\nError: Cannot read '$TMP_${dd}styles': $!\n";
}
}
################## Weird Special case ##################
# The new texexpand can be told to leave in \input and \include
# commands which contain code that the translator should simply pass
# to latex, such as the psfig stuff. These should still be seen by
# TeX, so we add them to the preamble ...
sub do_include_lines {
while (s/$include_line_rx//o) {
local($include_line) = &revert_to_raw_tex($&);
&add_to_preamble ('include', $include_line);
}
}
########################## Preprocessing ############################
# JCL(jcl-verb)
# The \verb declaration and the verbatim environment contain simulated
# typed text and should not be processed. Characters such as $,\,{,and }
# loose their special meanings and should not be considered when marking
# brackets etc. To achieve this \verb declarations and the contents of
# verbatim environments are replaced by markers. At the end the original
# text is put back into the document.
# The markers for verb and verbatim are different so that these commands
# can be restored to what the raw input was just in case they need to
# be passed to latex.
sub pre_process {
# Modifies $_;
#JKR: We need support for some special environments.
# This has to be here, because they might contain
# structuring commands like \section etc.
local(%comments);
&pre_pre_process if (defined &pre_pre_process);
s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds
&replace_html_special_chars;
# Remove fake environment which should be invisible to LaTeX2HTML.
s/\001//m;
s/[%]end\s*{latexonly}/\001/gom;
s/[%]begin\s*{latexonly}([^\001]*)\001/%/gos;
s/\001//m;
&preprocess_alltt if defined(&preprocess_alltt);
$KEEP_FILE_MARKERS = 1;
if ($KEEP_FILE_MARKERS) {
# if (s/%%% TEXEXPAND: \w+ FILE( MARKER)? (\S*).*/
# ''.qq|#$2#|."\n"/em) {
# $_ = "#$2#\n". $_ };
#RRM: ignore \n at end of included file, else \par may result
if (s/(\n{1,2})?%%% TEXEXPAND: \w+ FILE( MARKER)? (\S*).*\n?/
($2?$1:"\n").''.qq|#$3#|."\n"/em) {
$_ = "#$3#\n". $_ };
} else {
s/%%% TEXEXPAND[^\n]*\n//gm;
}
# Move all LaTeX comments into a local list
s/([ \t]*(^|\G|[^\\]))(%.*(\n[ \t]*|$))/print "%";
$comments{++$global{'verbatim_counter'}} = "$3";
&write_mydb("verbatim", $global{'verbatim_counter'}, $3);
"$1$comment_mark".$global{'verbatim_counter'}."\n"/mge;
# Remove the htmlonly-environment
s/\\begin\s*{htmlonly}\s*\n?//gom;
s/\\end\s*{htmlonly}\s*\n?//gom;
# Remove enviroments which should be invisible to LaTeX2HTML.
s/\n[^%\n]*\\end\s*{latexonly}\s*\n?/\001/gom;
s/((^|\n)[^%\n]*)\\begin\s*{latexonly}([^\001]*)\001/$1/gom;
s/\\end\s*{comment}\s*\n?/\001/gom;
s/\\begin\s*{comment}([^\001]*)\001//gom;
# this used to be earlier, but that can create problems with comments
&wrap_other_environments if (%other_environments);
# s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds
local($next, $esc_del);
&normalize_language_changes;
# Patches by #JKR, #EI#, #JCL(jcl-verb)
#protect \verb|\begin/end....| parts, for LaTeX documentation
s/(\\verb\*?(.))\\(begin|end)/$1\003$3/g;
local(@processedV);
local($opt, $style_info,$before, $contents, $after, $env);
while (($UNFINISHED_COMMENT)||
(/\\begin\s*($opt_arg_rx)?\s*\{($verbatim_env_rx|$keepcomments_rx)\}/o)) {
($opt, $style_info) = ($1,$2);
$before=$contents=$after=$env='';
if ($UNFINISHED_COMMENT) {
$UNFINISHED_COMMENT =~ s/([^:]*)::(\d+)/$env=$1;$after=$_;
$before = join("",$unfinished_mark,$env,$2,"#");''/e;
print "\nfound the lost \\end{$env}\n";
}
#RRM: can we avoid copying long strings here ?
# maybe this loop can be an s/.../../s with (.*?)
#
($before, $after, $env) = ($`, $', $3) unless ($env);
if (!($before =~
/\\begin(\s*\[[^\]]*\]\s*)?\{($verbatim_env_rx|$keepcomments_rx)\}/)) {
push(@processedV,$before);
print "'";$before = '';
}
if ($after =~ /\s*\\end{$env[*]?}/) { # Must NOT use the s///o option!!!
($contents, $after) = ($`, $');
$contents =~ s/^\n+/\n/s;
# $contents =~ s/\n+$//s;
# re-insert comments
$contents =~ s/$comment_mark(\d+)\n?/$comments{$1}/g;
# $contents =~ s/$comment_mark(\d+)/$verbatim{$1}/g;
# revert '\\ ' -> '\\' only once
if ($env =~ /rawhtml|$keepcomments_rx/i) {
$contents = &revert_to_raw_tex($contents);
} else {
$contents =~ s/([^\\](?:\\\\)*\\)([$html_escape_chars])/$1.&special($2)/geos;
$contents =~ s/\\\\ /\\\\/go;
}
if ($env =~/$keepcomments_rx/) {
$verbatim{++$global{'verbatim_counter'}} = "$contents";
} else {
&write_mydb("verbatim", ++$global{'verbatim_counter'}, $contents);
}
# $verbatim{$global{'verbatim_counter'}} = "$contents" if ($env =~/$keepcomments_rx/);
# $verbatim{$global{'verbatim_counter'}} = "$contents";
if ($env =~ /rawhtml|$keepcomments_rx/i) {
if ($before) {
$after = join("",$verbatim_mark,$env
,$global{'verbatim_counter'},"#",$after);
} else {
push (@processedV, join("",$verbatim_mark,$env
,$global{'verbatim_counter'},"#"));
}
} elsif ($env =~ /tex2html_code/) {
if ($before) {
$after = join("","\\begin", $opt, "\{verbatim_code\}"
, $verbatim_mark,$env
, $global{'verbatim_counter'},"#"
, "\\end\{verbatim_code\}",$after);
} else {
push (@processedV
, join("","\\begin", $opt, "\{verbatim_code\}"
, $verbatim_mark,$env
, $global{'verbatim_counter'},"#"
, "\\end\{verbatim_code\}"));
}
} else {
if ($before) {
$after = join("","\\begin", $opt, "\{tex2html_preform\}"
, $verbatim_mark,$env
, $global{'verbatim_counter'},"#"
, "\\end\{tex2html_preform\}",$after);
} else {
push (@processedV
, join("","\\begin", $opt, "\{tex2html_preform\}"
, $verbatim_mark,$env
, $global{'verbatim_counter'},"#"
, "\\end\{tex2html_preform\}" ));
}
}
} else {
print "Cannot find \\end{$env}\n";
$after =~ s/$comment_mark(\d+)\n?/$comments{$1}/g;
# $after =~ s/$comment_mark(\d+)/$verbatim{$1}/g;
if ($env =~ /rawhtml|$keepcomments_rx/i) {
$after = &revert_to_raw_tex($contents);
} else {
$after =~ s/([^\\](?:\\\\)*\\)([$html_escape_chars])/$1.&special($2)/geos;
$after =~ s/\\\\ /\\\\/go;
}
if ($env =~/$keepcomments_rx/) {
$verbatim{++$global{'verbatim_counter'}} = "$after";
} else {
&write_mydb("verbatim", ++$global{'verbatim_counter'}, $after );
}
$after = join("",$unfinished_mark,$env
,$global{'verbatim_counter'},"#");
}
$_ = join("",$before,$after);
}
print STDOUT "\nsensitive environments found: ".(int(0+@processedV/2))." "
if((@processedV)&&($VERBOSITY > 1));
$_ = join('',@processedV, $_); undef @processedV;
#restore \verb|\begin/end....| parts, for LaTeX documentation
# $_ =~ s/(\\verb\W*?)\003(begin|end)/$1\\$2/g;
$_ =~ s/(\\verb(;SPM\w+;|\W*?))\003(begin|end)/$1\\$3/g;
# Now do the \verb declarations
# Patches by: #JKR, #EI#, #JCL(jcl-verb)
# Tag \verb command and legal opening delimiter with unique number.
# Replace tagged ones and its contents with $verb_mark & id number if the
# closing delimiter can be found. After no more \verb's are to tag, revert
# tagged one's to the original pattern.
local($del,$contents,$verb_rerun);
local($id) = $global{'verb_counter'};
# must tag only one alternation per loop
##RRM: can this be speeded up using a list ??
my $vbmark = $verb_mark;
while (s/\\verb(\t*\*\t*)(\S)/"$2"/e ||
s/\\verb()(\;SPM\w+\;|[^a-zA-Z*\s])/"$2"/e ||
s/\\verb(\t\t*)([^*\s])/"$2"/e) {
$del = $2;
#RRM: retain knowledge of whether \verb* or \verb
$vb_mark = ($1 =~/^\s*\*/? $verbstar_mark : $verb_mark);
$esc_del = &escape_rx_chars($del);
$esc_del = '' if (length($del) > 2);
# try to find closing delimiter and substitute the complete
# statement with $verb_mark or $verbstar_mark
# s/(]*$id>[\Q$del\E])([^$esc_del\n]*)([\Q$del\E]|$comment_mark(\d+)\n?)/
s/(]*$id>\Q$del\E)([^$esc_del\n]*?)(\Q$del\E|$comment_mark(\d+)\n?)/
$contents=$2;
if ($4) { $verb_rerun = 1;
join('', "\\verb$del", $contents, $comments{$4})
} else {
$contents =~ s|\\\\ |\\\\|g;
$contents =~ s|\n| |g;
$verb{$id}=$contents;
$verb_delim{$id}=$del;
join('',$vb_mark,$id,$verb_mark)
}
/e;
}
$global{'verb_counter'} = $id;
# revert changes to fake verb statements
s/]*)\d+>/\\verb$1/g;
#JKR: the comments include the linebreak and the following whitespace
# s/([^\\]|^)(%.*\n[ \t]*)+/$1/gom; # Remove Comments but not % which may be meaningful
s/((^|\n)$comment_mark(\d+))+//gom; # Remove comment markers on new lines, but *not* the trailing \n
s/(\\\w+|(\W?))($comment_mark\d*\n?)/($2)? $2.$3:($1? $1.' ':'')/egm; # Remove comment markers, not after braces
# s/(\W?)($comment_mark\d*\n?)/($1)? $1.$2:''/egm; # Remove comment markers, not after braces
# Remove comment markers, but *not* the trailing \n
# HWS: Correctly remove multiple %%'s.
#
s/\\%/\002/gm;
# s/(%.*\n[ \t]*)//gm;
s/(%[^\n]*\n)[ \t]*/$comment_mark\n/gm;
s/\002/\\%/gm;
local($tmp1,$tmp2);
s/^$unfinished_mark$keepcomments_rx(\d+)#\n?$verbatim_mark$keepcomments_rx(\d+)#/
$verbatim{$4}."\n\\end{$1}"/egm; # Raw TeX
s/$verbatim_mark$keepcomments_rx(\d+)#/
$tmp1 = $1;
$tmp2 = &protect_after_comments($verbatim{$2});
$tmp2 =~ s!\n$!!s;
join ('', "\\begin{$tmp1}"
, $tmp2
, "\n\\end{$tmp1}"
)/egm; # Raw TeX
s/$unfinished_mark$keepcomments_rx(\d+)#/$UNFINISHED_COMMENT="$1::$2";
"\\begin{$1}\n".$verbatim{$2}/egm; # Raw TeX
$KEEP_FILE_MARKERS = 1;
if ($KEEP_FILE_MARKERS) {
s/%%% TEXEXPAND: \w+ FILE( MARKER) (\S*).*\n/
''.qq|#.$2#\n|/gem;
} else {
s/%%% TEXEXPAND[^\n]*\n//gm;
}
&mark_string($_);
# attempt to remove the \html \latex and \latexhtml commands
s/\\latex\s*($O\d+$C)(.*)\1//gm;
s/\\latexhtml\s*($O\d+$C)(.*)\1\s*($O\d+$C)(.*)\3/$4/sg;
s/\\html\s*($O\d+$C)(.*)\1/$2/sg;
s/\\html\s*($O\d+$C)//gm;
# &make_unique($_);
}
# RRM: When comments are retained, then ensure that they are benign
# by removing \s and escaping braces,
# so that environments/bracing cannot become unbalanced.
sub protect_after_comments {
my ($verb_text) = @_;
# $verb_text =~ s/\%(.*)/'%'.&protect_helper($1)/eg;
$verb_text =~ s/(^|[^\\])(\\\\)*\%(.*)/$1.$2.'%'.&protect_helper($3)/emg;
$verb_text;
}
sub protect_helper {
my ($text) = @_;
$text =~ s/\\/ /g;
$text =~ s/(\{|\})/\\$1/g;
$text;
}
sub make_comment {
local($type,$_) = @_;
$_ =~ s/\\(index|label)\s*(($O|$OP)\d+($C|$CP)).*\2//sg;
$_ = &revert_to_raw_tex($_); s/^\n+//m;
$_ =~ s/\\(index|label)\s*\{.*\}//sg;
s/\-\-/- -/g; s/\-\-/- -/g; # cannot have -- inside a comment
$_ = join('', '" );
$verbatim{++$global{'verbatim_counter'}} = $_;
&write_mydb('verbatim', $global{'verbatim_counter'}, $_ );
join('', $verbatim_mark, 'verbatim' , $global{'verbatim_counter'},'#')
}
sub wrap_other_environments {
local($key, $env, $start, $end, $opt_env, $opt_start);
foreach $key (keys %other_environments) {
# skip bogus entries
next unless ($env = $other_environments{$key});
$key =~ s/:/($start,$end)=($`,$');':'/e;
if (($end =~ /^\#$/m) && ($start =~ /^\#/m)) {
# catch Indica pre-processor language switches
$opt_start = $';
if ($env =~ s/\[(\w*)\]//o) {
$opt_env = join('','[', ($1 ? $1 : $opt_start ), ']');
}
local($next);
while ($_ =~ /$start\b/) {
push(@pre_wrapped, $`, "\\begin\{pre_$env\}", $opt_env );
$_=$';
if (/(\n*)$end/) {
push(@pre_wrapped, $`.$1,"\\end\{pre_$env\}$1");
$_ = $';
if (!(s/^N(IL)?//o)) {$_ = '#'.$_ }
} else {
print "\n *** unclosed $start...$end chunk ***\n";
last;
}
}
$_ = join('', @pre_wrapped, $_);
undef @pre_wrapped;
} elsif (($end=~/^\n$/) && ($start =~ /^\#/)) {
# catch ITRANS pre-processor language info; $env = 'nowrap';
local($ilang) = $start; $ilang =~ s/^\#//m;
s/$start\s*\=([^<\n%]*)\s*($comment_mark\d*|\n|%)/\\begin\{tex2html_$env\}\\ITRANSinfo\{$ilang\}\{$1\}\n\\end\{tex2html_$env\}$2/g;
} elsif (!$end &&($start =~ /^\#/m)) {
# catch Indica pre-processor input-mode switches
s/$start(.*)\n/\\begin\{tex2html_$env\}$&\\end\{tex2html_$env\}\n/g;
} elsif (($start eq $end)&&(length($start) == 1)) {
$start =~ s/(\W)/\\$1/; $end = $start;
s/([^$end])$start([^$end]+)$end/$1\\begin\{pre_$env\}$2\\end\{pre_$env\}/mg;
} elsif ($start eq $end) {
if (!($start =~ /\#\#/)) {
$start =~ s/(\W)/\\$1/g; $end = $start; }
local (@pre_wrapped);
local($opt); $opt = '[indian]' if ($start =~ /^\#\#$/m);
while ($_ =~ /$start/s) {
push(@pre_wrapped, $` , "\\begin\{pre_$env\}$opt");
$_=$';
if (/$end/s) {
push(@pre_wrapped, $`, "\\end\{pre_$env\}");
$_ = $';
} else {
print "\n *** unclosed $start...$end chunk ***\n";
last;
}
}
$_ = join('', @pre_wrapped, $_);
undef @pre_wrapped;
} elsif ($start && ($env =~ /itrans/)) {
# ITRANS is of this form
local($indic); if($start =~ /\#(\w+)$/m) {$indic = $1}
#include the language-name as an optional parameter
s/$start\b/\\begin\{pre_$env\}\[$indic\]/sg;
s/$end\b/\\end\{pre_$env\}/sg;
} elsif (($start)&&($end)) {
s/$start\b/\\begin\{pre_$env\}/sg;
s/$end\b/\\end\{pre_$env\}/sg;
}
}
$_;
}
#################### Marking Matching Brackets ######################
# Reads the entire input file and performs pre_processing operations
# on it before returning it as a single string. The pre_processing is
# done on separate chunks of the input file by separate Unix processes
# as determined by LaTeX \input commands, in order to reduce the memory
# requirements of LaTeX2HTML.
sub slurp_input_and_partition_and_pre_process {
local($file) = @_;
local(%string, @files, $pos);
local ($count) = 1;
unless(open(SINPUT,"<$file")) {
die "\nError: Cannot read '$file': $!\n";
}
local(@file_string);
print STDOUT "$file" if ($VERBOSITY >1);
while () {
if (/TEXEXPAND: INCLUDED FILE MARKER (\S*)/) {
# Forking seems to screw up the rest of the input stream
# We save the current position ...
$pos = tell SINPUT;
print STDOUT " fork at offset $pos " if ($VERBOSITY >1);
$string{'STRING'} = join('',@file_string); @file_string = ();
&write_string_out($count);
delete $string{'STRING'};
# ... so that we can return to it
seek(SINPUT, $pos, 0);
print STDOUT "\nDoing $1 ";
++$count}
else {
# $string{'STRING'} .= $_
push(@file_string,$_);
}
}
$string{'STRING'} = join('',@file_string); @file_string = ();
&write_string_out($count);
delete $string{'STRING'};
close SINPUT;
@files = ();
if(opendir(DIR, $TMP_)) {
@files = sort grep(/^\Q$PARTITION_PREFIX\E\d+/, readdir(DIR));
closedir(DIR);
}
unless(@files) {
die "\nFailed to read in document parts.\n".
"Look up section Globbing in the troubleshooting manual.\n";
}
$count = 0;
foreach $file (@files) {
print STDOUT "\nappending file: $TMP_$dd$file " if ($VERBOSITY > 1);
$_ .= (&catfile("$TMP_$dd$file") || '');
print STDOUT "\ntotal length: ".length($_)." characters\n" if ($VERBOSITY > 1);
}
die "\nFailed to read in document parts (out of memory?).\n"
unless length($_);
print STDOUT "\ntotal length: ".length($_)." characters\n" if ($VERBOSITY > 1);
}
sub write_string_out {
local($count) = @_;
if ($count < 10) {$count = '00'.$count}
elsif ($count < 100) {$count = '0'.$count}
local($pid);
# All open unflushed streams are inherited by the child. If this is
# not set then the parent will *not* wait
$| = 1;
# fork returns 0 to the child and PID to the parent
&write_mydb_simple("prelatex", $prelatex);
&close_dbm_database;
unless ($CAN_FORK) {
&do_write_string_out;
} else {
unless ($pid = fork) {
&do_write_string_out;
exit 0;
};
waitpid($pid,0);
}
&open_dbm_database;
}
sub do_write_string_out {
local($_);
close (SINPUT) if($CAN_FORK);
&open_dbm_database;
$_ = delete $string{'STRING'};
# locate blank-lines, for paragraphs.
# Replace verbatim environments etc.
&pre_process;
# locate the blank lines for \par s
&substitute_pars;
# Handle newcommand, newenvironment, newcounter ...
&substitute_meta_cmds;
&wrap_shorthand_environments;
print STDOUT "\n *** End-of-partition ***" if ($VERBOSITY > 1);
if(open(OUT, ">$TMP_$dd$PARTITION_PREFIX$count")) {
print OUT $_;
close(OUT);
} else {
print "\nError: Cannot write '$TMP_$dd$PARTITION_PREFIX$count': $!\n";
}
print STDOUT $_ if ($VERBOSITY > 9);
$preamble = join("\n",$preamble,@preamble); # undef @preamble;
&write_mydb_simple("preamble", $preamble);
# this was done earlier; it should not be repeated
#&write_mydb_simple("prelatex", $prelatex);
&write_mydb_simple("aux_preamble", $aux_preamble);
&close_dbm_database;
}
# Reads the entire input file into a
# single string.
sub slurp_input {
local($file) = @_;
local(%string);
if(open(INPUT,"<$file")) {
local(@file_string);
while () {
push(@file_string, $_ );
}
$string{'STRING'} = join('',@file_string);
close INPUT;
undef @file_string;
} else {
print "\nError: Cannot read '$file': $!\n";
}
$_ = delete $string{'STRING'}; # Blow it away and return the result
}
# MRO: make them more efficient
sub special {
$html_specials{$_[0]} || $_[0];
}
sub special_inv {
$html_specials_inv{$_[0]} || $_[0];
}
sub special_html {
$html_special_entities{$_[0]} || $_[0];
}
sub special_html_inv {
$html_spec_entities_inv{$_[0]} || $_[0];
}
# Mark each matching opening and closing bracket with a unique id.
sub mark_string {
# local (*_) = @_; # Modifies $_ in the caller;
# -> MRO: changed to $_[0] (same effect)
# MRO: removed deprecated $*, replaced by option /m
$_[0] =~ s/(^|[^\\])\\{/$1tex2html_escaped_opening_bracket/gom;
$_[0] =~ s/(^|[^\\])\\{/$1tex2html_escaped_opening_bracket/gom; # repeat this
$_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom;
$_[0] =~ s/(^|[^\\])\\}/$1tex2html_escaped_closing_bracket/gom; # repeat this
my $id = $global{'max_id'};
my $prev_id = $id;
# mark all balanced braces
# MRO: This should in fact mark all of them as the hierarchy is
# processed inside-out.
1 while($_[0] =~ s/{([^{}]*)}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo);
# What follows seems esoteric...
my @processedB = ();
# Take one opening brace at a time
while ($_[0] =~ /\{/) {
my ($before,$after) = ($`,$');
my $change = 0;
while (@UNMATCHED_OPENING && $before =~ /\}/) {
my $this = pop(@UNMATCHED_OPENING);
print "\n *** matching brace \#$this found ***\n";
$before =~ s/\}/join("",$O,$this,$C)/eo;
$change = 1;
}
$_[0] = join('',$before,"\{",$after) if($change);
# MRO: mark one opening brace
if($_[0] =~ s/^([^{]*){/push(@processedB,$1);join('',$O,++$id,$C)/eos) {
$before=''; $after=$';
}
if ($after =~ /\}/) {
$after =~ s/\}/join("",$O,$id,$C)/eo;
$_[0] = join('',$before,$O,$id,$C,$after);
} else {
print "\n *** opening brace \#$id is unmatched ***\n";
$after =~ /^(.+\n)(.+\n)?/;
print " preceding: $after \n";
push (@UNMATCHED_OPENING,$id);
}
}
$_[0] = join('',@processedB,$_[0]); undef(@processedB);
print STDOUT "\nInfo: bracketings found: ", $id - $prev_id,"\n"
if ($VERBOSITY > 1);
# process remaining closing braces
while (@UNMATCHED_OPENING && $_[0] =~ /\}/) {
my $this = pop(@UNMATCHED_OPENING);
print "\n *** matching brace \#$this found ***\n";
$_[0] =~ s/\}/join("",$O,$this,$C)/eo;
}
while ($_[0] =~ /\}/) {
print "\n *** there was an unmatched closing \} ";
my ($beforeline,$prevline,$afterline) = ($`, $`.$& , $');
$prevline =~ /\n([^\n]+)\}$/m;
if ($1) {
print "at the end of:\n" . $1 . "\}\n\n";
} else {
$afterline =~ /^([^\n]+)\n/m;
if ($1) {
print "at the start of:\n\}" . $1 ."\n\n";
} else {
$prevline =~ /\n([^\n]+)\n\}$/m;
print "on a line by itself after:\n" . $1 . "\n\}\n\n";
}
}
$_[0] = $beforeline . $afterline;
}
$global{'max_id'} = $id;
# restore escaped braces
$_[0] =~ s/tex2html_escaped_opening_bracket/\\{/go;
$_[0] =~ s/tex2html_escaped_closing_bracket/\\}/go;
}
sub replace_html_special_chars {
# Replaces html special characters with markers unless preceded by "\"
s/([^\\])(<|>|&|\"|``|'')/&special($1).&special($2)/geom;
# MUST DO IT AGAIN JUST IN CASE THERE ARE CONSECUTIVE HTML SPECIALS
s/([^\\])(<|>|&|\"|``|'')/&special($1).&special($2)/geom;
s/^(<|>|&|\"|``|'')/&special($1)/geom;
}
# used in \verbatiminput only: $html_escape_chars = '<>&';
sub replace_all_html_special_chars { s/([$html_escape_chars])/&special($1)/geom; }
# The bibliography and the index should be treated as separate sections
# in their own HTML files. The \bibliography{} command acts as a sectioning command
# that has the desired effect. But when the bibliography is constructed
# manually using the thebibliography environment, or when using the
# theindex environment it is not possible to use the normal sectioning
# mechanism. This subroutine inserts a \bibliography{} or a dummy
# \textohtmlindex command just before the appropriate environments
# to force sectioning.
sub add_bbl_and_idx_dummy_commands {
local($id) = $global{'max_id'};
s/([\\]begin\s*$O\d+$C\s*thebibliography)/$bbl_cnt++; $1/eg;
## if ($bbl_cnt == 1) {
s/([\\]begin\s*$O\d+$C\s*thebibliography)/$id++; "\\bibliography$O$id$C$O$id$C $1"/geo;
#}
$global{'max_id'} = $id;
s/([\\]begin\s*$O\d+$C\s*theindex)/\\textohtmlindex $1/o;
s/[\\]printindex/\\textohtmlindex /o;
&lib_add_bbl_and_idx_dummy_commands() if defined(&lib_add_bbl_and_idx_dummy_commands);
}
# Uses and modifies $default_language
# This would be straight-forward except when there are
# \MakeUppercase, \MakeLowercase or \uppercase , \lowercase commands
# present in the source. The cases have to be adjusted before the
# ISO-character code is set; e.g. with "z --> "Z in german.perl
#
sub convert_iso_latin_chars {
local($_) = @_;
local($next_language, $pattern);
local($xafter, $before, $after, $funct, $level, $delim);
local(@case_processed);
while (/$case_change_rx/) {
$xafter = $2;
# $before .= $`;
push(@case_processed, $`);
$funct = $3;
$after = '';
$_ = $';
if ($xafter =~ /noexpand/) { $before .= "\\$funct"; next; }
s/^[\s%]*(.)/$delim=$1;''/eo;
if ($delim =~ /{/ ) {
# brackets not yet numbered...
# $before .= $funct . $delim;
push(@case_processed, $funct . $delim);
$level = 1;
$after = $delim;
while (($level)&&($_)&&(/[\{\}]/)) {
$after .= $` . $&;
$_ = $';
if ( "$&" eq "\{" ) {$level++}
elsif ( "$&" eq "\}" ) { $level-- }
else { print $_ }
print "$level";
}
# $before .= $after;
push(@case_processed, $after);
} elsif ($delim eq "<") {
# brackets numbered, but maybe not processed...
s/((<|#)(\d+)(>|#)>).*\1//;
$after .= $delim . $&;
$_ = $';
print STDOUT "\n<$2$funct$4>" if ($VERBOSITY > 2);
$funct =~ s/^\\//o;
local($cmd) = "do_cmd_$funct";
$after = &$cmd($after);
# $before .= $after;
push(@case_processed, $after);
} elsif (($xafter)&&($delim eq "\\")) {
# preceded by \expandafter ...
# ...so expand the following macro first
$funct =~ s/^\\//o;
local($case_change) = $funct;
s/^(\w+|\W)/$funct=$1;''/eo;
local($cmd) = $funct;
local($thiscmd) = "do_cmd_$funct";
if (defined &$thiscmd) { $_ = &$thiscmd($_) }
elsif ($new_command{$funct}) {
local($argn, $body, $opt) = split(/:!:/, $new_command{$funct});
do { ### local($_) = $body;
&make_unique($body);
} if ($body =~ /$O/);
if ($argn) {
do {
local($before) = '';
local($after) = "\\$funct ".$_;
$after = &substitute_newcmd; # may change $after
$after =~ s/\\\@#\@\@/\\/o ;
}
} else { $_ = $body . $_; }
} else { print "\nUNKNOWN COMMAND: $cmd "; }
$cmd = $case_change;
$case_change = "do_cmd_$cmd";
if (defined &$case_change) { $_ = &$case_change($_) }
} else {
# this should not happen, but just in case...
$funct =~ s/^\\//o;
local($cmd) = "do_cmd_$funct";
print STDOUT "\n\n<$delim$funct>" if ($VERBOSITY > 2);
$_ = join('', $delim , $_ );
if (defined &$cmd) { $_ = &$cmd($_) }
}
}
# $_ = join('', $before, $_) if ($before);
$_ = join('', @case_processed, $_) if (@case_processed);
# ...now do the conversions
($before, $after, $funct) = ('','','');
@case_processed = ();
if (/$language_rx/o) {
($next_language, $pattern, $before, $after) = (($2||$1), $&, $`, $');
$before = &convert_iso_latin_chars($before) if ($before);
# push(@case_processed, $pattern, $before);
local($br_id) = ++$global{'max_id'};
$pattern = join('' , '\selectlanguage', $O.$br_id.$C
, (($pattern =~ /original/) ? $TITLES_LANGUAGE : $next_language )
, $O.$br_id.$C );
push(@case_processed, $before, $pattern);
push(@language_stack, $default_language);
$default_language = $next_language;
$_ = &convert_iso_latin_chars($after);
$default_language = pop @language_stack;
} else {
$funct = $language_translations{$default_language};
(defined(&$funct) ? $_ = &$funct($_) :
do { &write_warnings(
"\nCould not find translation function for $default_language.\n\n")
}
);
if ($USE_UTF ||(!$NO_UTF &&(defined %unicode_table)&&length(%unicode_table)>2)) {
&convert_to_unicode($_)};
}
$_ = join('', @case_processed, $_); undef(@case_processed);
$_;
}
# May need to add something here later
sub english_translation { $_[0] }
# This replaces \setlanguage{\language} with \languageTeX
# This makes the identification of language chunks easier.
sub normalize_language_changes {
s/$setlanguage_rx/\\$2TeX/gs;
}
sub get_current_language {
return () if ($default_language eq $TITLES_LANGUAGE);
local($lang,$lstyle) = ' LANG="';
$lang_code = $iso_languages{$default_language};
if (%styled_languages) {
$lstyle = $styled_languages{$default_language};
$lstyle = '" CLASS="'.$lstyle if $lstyle;
}
($lang_code ? $lang.$lang_code.$lstyle.'"' : '');
}
%styled_languages = ();
sub do_cmd_htmllanguagestyle {
local($_) = @_;
local($class) = &get_next_optional_argument;
local($lang) = &missing_braces unless (
(s/$next_pair_pr_rx/$lang=$2;''/e)
||(s/$next_pair_rx/$lang=$2;''/e));
return ($_) unless $lang;
local($class) = $iso_languages{$lang} unless $class;
if ($USING_STYLES && $class) {
print "\nStyling language: $lang = \"$class\" ";
$styled_languages{"$lang"} = $class;
}
$_;
}
# General translation mechanism:
#
#
# The main program latex2html calls texexpand with the document name
# in order to expand some of its \input and \include statements, here
# also called 'merging', and to write a list of sensitized style, class,
# input, or include file names.
# When texexpand has finished, all is contained in one file, TMP_foo.
# (assumed foo.tex is the name of the document to translate).
#
# In this version, texexpand cares for following environments
# that may span include files / section boundaries:
# (For a more technical description, see texexpand.)
# a) \begin{comment}
# b) %begin{comment}
# c) \begin{any} introduced with \excludecomment
# d) %begin{any}
# e) \begin{verbatim}
# f) \begin{latexonly}
# g) %begin{latexonly}
#
# a)-d) cause texexpand to drop its contents, it will not show up in the
# output file. You can use this to 'comment out' a bunch of files, say.
#
# e)-g) prevent texexpand from expanding input files, but the environment
# content goes fully into the output file.
#
# Together with each merging of \input etc. there are so-called %%%texexpand
# markers accompanying the boundary.
#
# When latex2html reads in the output file, it uses these markers to write
# each part to a separate file, and process them further.
#
#
# If you have, for example:
#
# a) preample
# b) \begin{document}
# c) text
# d) \input{chapter}
# e) more text
# f) \end{document}
#
# you end up in two parts, part 1 is a)-c), part 2 is the rest.
# Regardless of environments spanning input files or sections.
#
#
# What now starts is meta command substitution:
# Therefore, latex2html forks a child process on the first part and waits
# until it finished, then forks another on the next part and so forth
# (see also &slurp_input_and_partition_and_preprocess).
#
# Here's what each child is doing:
# Each child process reads the new commands translated so far by the previous
# child from the TMP_global DBM database.
# After &pre_processing, it substitutes the meta commands (\newcommand, \def,
# and the like) it finds, and adds the freshly retrieved new commands to the
# list so far.
# This is done *only on its part* of the document; this saves upwards of memory.
# Finally, it writes its list of new commands (synopsis and bodies) to the
# DBM database, and exits.
# After the last child finished, latex2html reads in all parts and
# concatenates them.
#
#
# So, at this point in time (start of &translate), it again has the complete
# document, but now preprocessed and with new commands substituted.
# This has several disadvantages: an amount of commands is substituted (in
# TeX lingo, expanded) earlier than the rest.
# This causes trouble if commands really must get expanded at the point
# in time they show up.
#
#
# Then, still in &translate, latex2html uses the list of section commands to
# split the complete document into chunks.
# The chunks are not written to files yet. They are retained in the @sections
# list, but each chunk is handled separately.
# latex2html puts the current chunk to $_ and processes it with
# &translate_environments etc., then fetches the next chunk, and so on.
# This prevents environments that span section boundaries from getting
# translated, because \begin and \end cannot find one another, to say it this
# way.
#
#
# After the chunk is translated to HTML, it is written to a file.
# When all chunks are done, latex2html rereads each file to get cross
# references right, replace image markers with the image file names, and
# writes index and bibliography.
#
#
sub translate {
&normalize_sections; # Deal with the *-form of sectioning commands
# Split the input into sections, keeping the preamble together
# Due to the regular expression, each split will create 5 more entries.
# Entry 1 and 2: non-letter/letter sectioning command,
# entry 4: the delimiter (may be empty)
# entry 5: the text.
local($pre_section, @sections);
if (/\\(startdocument|begin\s*($O\d+$C)\s*document\s*\2)/) {
$pre_section = $`.$&; $_ = $';
}
@sections = split(/$sections_rx/, $_);
$sections[0] = $pre_section.$sections[0] if ($pre_section);
undef $pre_section;
local($sections) = int(scalar(@sections) / 5);
# Initialises $curr_sec_id to a list of 0's equal to
# the number of sectioning commands.
local(@curr_sec_id) = split(' ', &make_first_key);
local(@segment_sec_id) = @curr_sec_id;
local($i, $j, $current_depth) = (0,0,0);
local($curr_sec) = $SHORT_FILENAME||$FILE;
local($top_sec) = ($SEGMENT ? '' : 'top of ');
# local(%section_info, %toc_section_info, $CURRENT_FILE, %cite_info, %ref_files);
local($CURRENT_FILE);
# These filenames may be set when translating the corresponding commands.
local($tocfile, $loffile, $lotfile, $footfile, $citefile, $idxfile,
$figure_captions, $table_captions, $footnotes, $citations, %font_size, %index,
%done, $t_title, $t_author, $t_date, $t_address, $t_affil, $changed);
local(@authors,@affils,@addresses,@emails,@authorURLs);
local(%index_labels, %index_segment, $preindex, %footnotes, %citefiles);
local($segment_table_captions, $segment_figure_captions);
local($dir,$nosave) = ('','');
local($del,$close_all,$open_all,$toc_sec_title,$multiple_toc);
local($open_tags_R) = [];
local(@save_open_tags)= ();
local(@language_stack) = ();
push (@language_stack, $default_language);
# $LATEX_FONT_SIZE = '10pt' unless ($LATEX_FONT_SIZE);
&process_aux_file
if $SHOW_SECTION_NUMBERS || /\\(caption|(html|hyper)?((eq)?ref|cite))/;
require ("${PREFIX}internals.pl") if (-f "${PREFIX}internals.pl");
#JCL(jcl-del)
&make_single_cmd_rx;
#
$tocfile = $EXTERNAL_CONTENTS;
$idxfile = $EXTERNAL_INDEX;
$citefile = $EXTERNAL_BIBLIO; $citefile =~ s/#.*$//;
$citefiles{1} = $citefile if ($citefile);
print "\nTranslating ...";
while ($i <= @sections) {
undef $_;
$_ = $sections[$i];
s/^[\s]*//; # Remove initial blank lines
# The section command was removed when splitting ...
s/^/\\$curr_sec$del/ if ($i > 0); # ... so put it back
if ($current_depth < $MAX_SPLIT_DEPTH) {
if (($footnotes)&&($NO_FOOTNODE)&&( $current_depth < $MAX_SPLIT_DEPTH)) {
local($thesenotes) = &make_footnotes ;
print OUTPUT $thesenotes;
}
$CURRENT_FILE = &make_name($curr_sec, join('_',@curr_sec_id));
open(OUTPUT, ">$CURRENT_FILE")
|| die "Cannot write '$CURRENT_FILE': $!\n";
if ($XBIT_HACK) { # use Apache's XBit hack
chmod 0744, $CURRENT_FILE;
&check_htaccess;
} else {
chmod 0644, $CURRENT_FILE;
}
if ($MULTIPLE_FILES && $ROOTED) {
if ($DESTDIR =~ /^\Q$FIXEDDIR\E[$dd$dd]?([^$dd$dd]+)/)
{ $CURRENT_FILE = "$1$dd$CURRENT_FILE" };
}
}
&remove_document_env;
# &wrap_shorthand_environments; #RRM Is this needed ?
print STDOUT "\n" if ($VERBOSITY);
print STDOUT "\n" if ($VERBOSITY > 2);
print $i/5,"/$sections";
print ":$top_sec$curr_sec:" if ($VERBOSITY);
# Must do this early ... It also sets $TITLE
&process_command($sections_rx, $_) if (/^$sections_rx/);
# reset tags saved from the previous section
$open_tags_R = [ @save_open_tags ];
@save_open_tags = ();
local($curr_sec_tex);
if ((! $TITLE) || ($TITLE eq $default_title)) {
eval '$TITLE = '.$default_title;
$TITLE = $default_title if $@;
$curr_sec_tex = ($top_sec ? '' :
join('', '"', &revert_to_raw_tex($curr_sec), '"'));
print STDOUT "$curr_sec_tex for $CURRENT_FILE\n" if ($VERBOSITY);
} else {
local($tmp) = &purify($TITLE,1);
$tmp = &revert_to_raw_tex($tmp);
print STDOUT "\"$tmp\" for $CURRENT_FILE\n" if ($VERBOSITY);
}
if (/\\(latextohtmlditchpreceding|startdocument)/m) {
local($after) = $';
local($before) = $`.$&;
$SEGMENT = 1 if ($1 =~ /startdocument/);
print STDOUT "\n *** translating preamble ***\n" if ($VERBOSITY);
$_ = &translate_preamble($before);
s/\n\n//g; s/
//g; # remove redundant blank lines and breaks
#
# &process_aux_file if $AUX_FILE_NEEDED;
#
print STDOUT "\n *** preamble done ***\n" if ($VERBOSITY);
$PREAMBLE = 0;
$NESTING_LEVEL=0;
&do_AtBeginDocument;
$after =~ s/^\s*//m;
print STDOUT (($VERBOSITY >2)? "\n*** Translating environments ***" : ";");
$after = &translate_environments($after);
print STDOUT (($VERBOSITY >2)? "\n*** Translating commands ***" : ";");
$_ .= &translate_commands($after);
# $_ = &translate_commands($after);
} else {
&do_AtBeginDocument;
$PREAMBLE = 0;
$NESTING_LEVEL=0;
print STDOUT (($VERBOSITY >2)? "\n*** Translating environments ***" : ";");
$_ = &translate_environments($_);
print STDOUT (($VERBOSITY >2)? "\n*** Translating commands ***" : ";");
$_ = &translate_commands($_);
}
# close any tags that remain open
if (@$open_tags_R) {
($close_all,$open_all) = &preserve_open_tags();
$_ .= $close_all;
@save_open_tags = @$open_tags_R; $open_tags_R = [];
} else { ($close_all,$open_all) = ('','') }
print STDOUT (($VERBOSITY >2)? "\n*** Translations done ***" : "\n");
# if (($footnotes)&&($NO_FOOTNODE)&&( $current_depth < $MAX_SPLIT_DEPTH)) {
# $_ .= &make_footnotes
# }
print OUTPUT $_;
# Associate each id with the depth, the filename and the title
###MEH -- starred sections don't show up in TOC ...
# RRM: ...unless $TOC_STARS is set
# $toc_sec_title = &simplify($toc_sec_title);
$toc_sec_title = &purify($toc_sec_title);# if $SEGMENT;
$toc_sec_title = &purify($TITLE) unless ($toc_sec_title);
if ($TOC_STARS) {
$toc_section_info{join(' ',@curr_sec_id)} =
"$current_depth$delim$CURRENT_FILE$delim$toc_sec_title"
# if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH);
if ($current_depth <= $TOC_DEPTH);
} else {
$toc_section_info{join(' ',@curr_sec_id)} =
"$current_depth$delim$CURRENT_FILE$delim$toc_sec_title"
. ($curr_sec =~ /star$/ ? "$delim" : "")
# if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH);
if ($current_depth <= $TOC_DEPTH);
}
# include $BODYTEXT in the section_info, when starting a new page
$section_info{join(' ',@curr_sec_id)} =
"$current_depth$delim$CURRENT_FILE$delim$TITLE$delim"
. (($current_depth < $MAX_SPLIT_DEPTH)? $BODYTEXT: "");
# Get type of section (see also the split above)
$curr_sec = $sections[$i+1].$sections[$i+2];
$del = $sections[$i+4];
# Get the depth of the current section;
# $curr_sec = $outermost_level unless $curr_sec;
$current_depth = $section_commands{$curr_sec};
if ($after_segment) {
$current_depth = $after_segment;
$curr_sec_id[$after_segment] += $after_seg_num;
($after_segment,$after_seg_num) = ('','');
for($j=1+$current_depth; $j <= $#curr_sec_id; $j++) {
$curr_sec_id[$j] = 0;
}
}
if ($SEGMENT||$SEGMENTED) {
for($j=1; $j <= $#curr_sec_id; $j++) {
$curr_sec_id[$j] += $segment_sec_id[$j];
$segment_sec_id[$j] = 0;
}
};
# this may alter the section-keys
$multiple_toc = 1 if ($MULTIPLE_FILES && $ROOTED && (/$toc_mark/));
#RRM : Should this be done here, or in \stepcounter ?
@curr_sec_id = &new_level($current_depth, @curr_sec_id);
$toc_sec_title = $TITLE = $top_sec = '';
$i+=5; #skip to next text section
}
$open_tags_R = [];
$open_all = '';
$_ = undef;
$_ = &make_footnotes if ($footnotes);
$CURRENT_FILE = '';
print OUTPUT;
close OUTPUT;
# # this may alter the section-keys
# &adjust_root_keys if $multiple_toc;
if ($PREPROCESS_IMAGES) { &preprocess_images }
else { &make_image_file }
print STDOUT "\n *** making images ***" if ($VERBOSITY > 1);
&make_images;
# Link sections, add head/body/address do cross-refs etc
print STDOUT "\n *** post-process ***" if ($VERBOSITY > 1);
&post_process;
if (defined &document_post_post_process) {
#BRM: extra document-wide post-processing
print STDOUT "\n *** post-processing Document ***" if ($VERBOSITY > 1);
&document_post_post_process();
}
print STDOUT "\n *** post-processed ***" if ($VERBOSITY > 1);
©_icons if $LOCAL_ICONS;
if ($SEGMENT || $DEBUG || $SEGMENTED) {
&save_captions_in_file("figure", $figure_captions) if $figure_captions;
&save_captions_in_file("table", $table_captions) if $table_captions;
# &save_array_in_file ("captions", "figure_captions", 0, %figure_captions) if %figure_captions;
# &save_array_in_file ("captions", "table_captions", 0, %table_captions) if %table_captions;
&save_array_in_file ("index", "index", 0, %index);
&save_array_in_file ("sections", "section_info", 0, %section_info);
&save_array_in_file ("contents", "toc_section_info", 0,%toc_section_info);
&save_array_in_file ("index", "sub_index", 1, %sub_index) if %sub_index;
&save_array_in_file ("index", "index_labels", 1, %index_labels) if %index_labels;
&save_array_in_file ("index", "index_segment", 1, %index_segment) if %index_segment;
&save_array_in_file ("index", "printable_key", 1, %printable_key)
if (%printable_key || %index_segment);
}
elsif ($MULTIPLE_FILES && $ROOTED) {
&save_array_in_file ("sections", "section_info", 0, %section_info);
&save_array_in_file ("contents", "toc_section_info", 0, %toc_section_info);
}
&save_array_in_file ("internals", "ref_files", 0, %ref_files) if $changed;
&save_array_in_file ("labels", "external_labels", 0, %ref_files);
&save_array_in_file ("labels", "external_latex_labels", 1, %latex_labels);
&save_array_in_file ("images", "cached_env_img", 0, %cached_env_img);
}
# RRM:
sub translate_preamble {
local($_) = @_;
$PREAMBLE = 1;
$NESTING_LEVEL=0; #counter for TeX group nesting level
# remove some artificially inserted constructions
s/\n${tex2html_deferred_rx}\\par\s*${tex2html_deferred_rx2}\n/\n/gm;
s/\\newedcommand(<<\d+>>)([A-Za-z]+|[^A-Za-z])\1(\[\d+\])?(\[[^]]*\])?(<<\d+>>)[\w\W\n]*\5($comment_mark\d*)?//gm;
s/\n{2,}/\n/ogm;
if (/\\htmlhead/) {
print STDOUT "\nPREAMBLE: discarding...\n$`" if ($VERBOSITY > 4);
local($after) = $&.$';
# translate segment preamble preceding \htmlhead
&translate_commands(&translate_environments($`));
# translate \htmlhead and rest of preamble
$_=&translate_commands(&translate_environments($after));
print STDOUT "\nPREAMBLE: retaining...\n$_" if ($VERBOSITY > 4);
} else {
# translate only preamble here (metacommands etc.)
# there should be no textual results, if so, discard them
&translate_commands(&translate_environments($_));
print STDOUT "\nPREAMBLE: discarding...\n$_" if ($VERBOSITY > 4);
$_="";
};
$_ = &do_AtBeginDocument($_);
if (! $SEGMENT) { $_ = ''} # segmented documents have a heading already
$_;
}
############################ Processing Environments ##########################
sub wrap_shorthand_environments {
# This wraps a dummy environment around environments that do not use
# the begin-end convention. The wrapper will force them to be
# evaluated by Latex rather than them being translated.
# Wrap a dummy environment around matching TMPs.
# s/^\$\$|([^\\])\$\$/{$1.&next_wrapper('tex2html_double_dollar')}/ge;
# Wrap a dummy environment around matching $s.
# s/^\$|([^\\])\$/{$1.&next_wrapper('$')}/ge;
# s/tex2html_double_dollar/\$\$/go;
# Do \(s and \[s
#
local($wrapper) = "tex2html_wrap_inline"; # \ensuremath wrapper
print STDOUT "\n *** wrapping environments ***\n" if ($VERBOSITY > 3);
# MRO: replaced $* with /m
print STDOUT "\\(" if ($VERBOSITY > 3);
s/(^\\[(])|([^\\])(\\[(])/{$2.&make_any_wrapper(1,'',$wrapper).$1.$3}/geom;
print STDOUT "\\)" if ($VERBOSITY > 3);
s/(^\\[)]|[^\\]\\[)])/{$1.&make_any_wrapper(0,'',$wrapper)}/geom;
print STDOUT "\\[" if ($VERBOSITY > 3);
s/(^\\[[])|([^\\])(\\[[])/{$2.&make_any_wrapper(1,1,"displaymath")}/geom;
print STDOUT "\\]" if ($VERBOSITY > 3);
s/(^\\[\]])|([^\\])(\\[\]])/{$2.&make_any_wrapper(0,1,"displaymath")}/geom;
print STDOUT "\$" if ($VERBOSITY > 3);
s/$enspair/print "\$";
{&make_any_wrapper(1,'',$wrapper).$&.&make_any_wrapper(0,'',$wrapper)}/geom;
$double_dol_rx = '(^|[^\\\\])\\$\\$';
$single_dol_rx = '(^|[^\\\\])\\$';
print STDOUT "\$" if ($VERBOSITY > 3);
local($dollars_remain) = 0;
$_ = &wrap_math_environment;
$_ = &wrap_raw_arg_cmds;
}
sub wrap_math_environment {
# This wraps math-type environments
# The trick here is that the opening brace is the same as the close,
# but they *can* still nest, in cases like this:
#
# $ outer stuff ... \hbox{ ... $ inner stuff $ ... } ... $
#
# Note that the inner pair of $'s is nested within a group. So, to
# handle these cases correctly, we need to make sure that the outer
# brace-level is the same as the inner. --- rst
#tex2html_wrap
# And yet another problem: there is a scungy local idiom to do
# this: $\_$ for a boldfaced underscore. xmosaic can't display the
# resulting itty-bitty bitmap, for some reason; even if it could, it
# would probably come out as an overbar because of the floating-
# baseline problem. So, we have to special case this. --- rst again.
local ($processed_text, @processed_text, $before, $end_rx, $delim, $ifclosed);
local ($underscore_match_rx) = "^\\s*\\\\\\_\\s*\\\$";
local ($wrapper);
print STDOUT "\nwrap math:" if ($VERBOSITY > 3);
#find braced dollars, in tabular-specs
while (/((($O|$OP)\d+($C|$CP))\s*)\$(\s*\2)/) {
push (@processed_text, $`, $1.$dol_mark.$5);
$_ = $';
}
$_ = join('',@processed_text, $_) if (@processed_text);
undef @processed_text;
$dollars_remain = 0;
while (/$single_dol_rx/) {
$processed_text .= $`.$1;
$_ = $';
$wrapper = "tex2html_wrap_inline";
$end_rx = $single_dol_rx; # Default, unless we begin with $$.
$delim = "\$";
if (/^\$/ && (! $`)) {
s/^\$//;
$end_rx = $double_dol_rx;
$delim = ""; # Cannot say "\$\$" inside displaymath
$wrapper = "displaymath";
} elsif (/$underscore_match_rx/ && (! $`)) {
# Special case for $\_$ ...
s/$underscore_match_rx//;
$processed_text .= '\\_';
next;
}
# Have an opening $ or $$. Find matching close, at same bracket level
# $processed_text .= &make_any_wrapper(1,'',$wrapper).$delim;
print STDOUT "\$" if ($VERBOSITY > 3);
$ifclosed = 0;
local($thismath);
while (/$end_rx/) {
# Forget the $$ if we are going to replace it with "displaymath"
$before = $` . (($wrapper eq "displaymath")? "$1" : $&);
last if ($before =~ /\\(sub)*(item|section|chapter|part|paragraph)(star)?\b/);
$thismath .= $before;
$_ = $';
s/^( [^\n])/\\space$1/s; #make sure a trailing space doesn't get lost.
# Found dollar sign inside open subgroup ... now see if it's
# at the same brace-level ...
local ($losing, $br_rx) = (0, '');
print STDOUT "\$" if ($VERBOSITY > 3);
while ($before =~ /$begin_cmd_rx/) {
$br_rx = &make_end_cmd_rx($1); $before = $';
if ($before =~ /$br_rx/) { $before = $'; }
else { $losing = 1; last; }
}
do { $ifclosed = 1; last } unless $losing;
# It wasn't ... find the matching close brace farther on; then
# keep going.
/$br_rx/;
$thismath .= $`.$&;
#RRM: may now contain unprocessed $s e.g. $\mbox{...$...$...}$
# the &do_cmd_mbox uses this specially to force an image
# ...but there may be other situations; e.g. \hbox
# so set a flag:
$dollars_remain = 1;
$_ = $';
}
# Got to the end. Whew!
if ($ifclosed) {
# also process any nested math
while (($dollars_remain)&&($delim eq "\$")) {
local($saved) = $_;
$thismath =~ s/\$$//;
$_ = $thismath;
$thismath = &wrap_math_environment;
$thismath .= "\$";
$_ = $saved;
}
$processed_text .= &make_any_wrapper(1,'',$wrapper) . $delim
. $thismath . &make_any_wrapper(0,'',$wrapper);
} else {
print STDERR "\n\n *** Error: unclosed math or extra `\$', before:\n$thismath\n\n";
# # remove a $ to try to recover as much as possible.
# $thismath =~ s/([^\\]\\\\|[^\\])\$/$1\%\%/;
# $_ = $thismath . $_; $thismath = "";
print "\n$thismath\n\n\n$_\n\n\n"; die;
}
}
$processed_text . $_;
}
sub translate_environments {
local ($_) = @_;
local($tmp, $capenv);
# print "\nTranslating environments ...";
local($after, @processedE);
local ($contents, $before, $br_id, $env, $pattern);
for (;;) {
# last unless (/$begin_env_rx/o);
last unless (/$begin_env_rx|$begin_cmd_rx|\\(selectlanguage)/o);
# local ($contents, $before, $br_id, $env, $pattern);
local($this_env, $opt_arg, $style_info);
$contents = '';
# $1,$2 : optional argument/text --- stylesheet info
# $3 : br_id (at the beginning of an environment name)
# $4 : environment name
# $5 : br_id of open-brace, when $3 == $4 == '';
# $6 : \selectlanguage{...}
if ($7) {
push(@processedE,$`);
$_ = $';
if (defined &do_cmd_selectlanguage) {
$_ = &do_cmd_selectlanguage($_);
} else {
local($cmd) = $7;
$pattern = &missing_braces unless (
s/$next_pair_rx/$pattern = $2;''/e);
local($trans) = $pattern.'_translation';
if (defined &$trans) {
&set_default_language($pattern,$_);
}
undef $cmd; undef $trans;
}
next;
} elsif ($4) {
($before, $opt_arg, $style_info, $br_id
, $env, $after, $pattern) = ($`, $2, $3, $4, $5, $', $&);
if (($before)&& (!($before =~ /$begin_env_rx|$begin_cmd_rx/))) {
push(@processedE,$before);
$_ = $pattern . $after; $before = '';
}
} else {
($before, $br_id, $env, $after, $pattern) = ($`, $6, 'group', $', $&);
if (($before)&& (!($before =~ /$begin_env_rx|$begin_cmd_rx/))) {
push(@processedE,$before);
$_ = $pattern . $after; $before = '';
}
local($end_cmd_rx) = &make_end_cmd_rx($br_id);
if ($after =~ /$end_cmd_rx/) {
# ... find the the matching closing one
$NESTING_LEVEL++;
($contents, $after) = ($`, $');
$contents = &process_group_env($contents);
print STDOUT "\nOUT: {$br_id} ".length($contents) if ($VERBOSITY > 3);
print STDOUT "\n:$contents\n" if ($VERBOSITY > 7);
# THIS MARKS THE OPEN-CLOSE DELIMITERS AS PROCESSED
$_ = join("", $before,"$OP$br_id$CP", $contents,"$OP$br_id$CP", $after);
$NESTING_LEVEL--;
} else {
$pattern = &escape_rx_chars($pattern);
s/$pattern//;
print "\nCannot find matching bracket for $br_id";
$_ = join("", $before,"$OP$br_id$CP", $after);
}
next;
}
$contents = undef;
local($defenv) = $env =~ /deferred/;
# local($color_env);
local($color_env)
unless ($env =~ /tabular|longtable|in(line|display)|math/);
local($closures,$reopens);
local(@save_open_tags) = @$open_tags_R unless ($defenv);
local($open_tags_R) = [ @save_open_tags ] unless ($defenv);
local(@saved_tags) if ($env =~ /tabular|longtable/);
if ($env =~ /tabular|longtable|makeimage|in(line|display)/) {
@save_open_tags = @$open_tags_R;
$open_tags_R = [ @save_open_tags ];
# check for color
local($color_test) = join(',',@$open_tags_R);
if ($color_test =~ /(color{[^}]*})/g ) {
$color_env = $1;
} # else { $color_env = '' }
if ($env =~ /tabular|longtable|makeimage/) {
# close to the surrounding block-type tag
($closures,$reopens,@saved_tags) = &preserve_open_block_tags();
@save_open_tags = @$open_tags_R;
$open_tags_R = [ @save_open_tags ];
if ($color_env) {
$color_test = join(',',@saved_tags);
if ($color_test =~ /(color{[^}]*})/g ) {
$color_env = $1;
}
}
} elsif ($env =~ /in(line|display)/) {
$closures = &close_all_tags() if ((&defined_env($env))
&&!($defenv)&&!($env=~/inline/)&&(!$declarations{$env}));
if ($color_env) {
$color_test = $declarations{$color_env};
$color_test =~ s/<\/.*$//;
$closures .= "\n$color_test";
push (@$open_tags_R , $color_env);
}
}
} elsif ($env =~ /alltt|tex2html_wrap/) {
# alltt is constructed as paragraphs, not with
# tex2html_wrap creates an image, which is at text-level
} else {
$closures = &close_all_tags() if ((&defined_env($env))
&&!($defenv)&&(!$declarations{$env}) );
}
# Sets $contents and modifies $after
if (&find_end_env($env,$contents,$after)) {
print STDOUT "\nIN-A {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
&process_command($counters_rx, $before)
if ($before =~ /$counters_rx/);
# This may modify $before and $after
# Modifies $contents
#RRM: the do_env_... subroutines handle when to translate sub-environments
# $contents = &translate_environments($contents) if
## ((!$defenv) && (&defined_env($env)) && (! $raw_arg_cmds{$env})
## && (!$declarations{$env})
# ((&defined_env($env)) && (! $raw_arg_cmds{$env})
# && (!($env =~ /latexonly|enumerate|figure|table|makeimage|wrap_inline/))
# && ((! $NO_SIMPLE_MATH)||(!($env =~ /wrap/)))
# && (!($env =~ /(math|wrap|equation|eqnarray|makeimage|minipage|tabular)/) )
# );
if ($opt_arg) {
&process_environment(1, $env, $br_id, $style_info); # alters $contents
} else {
&process_environment(0, $env, $br_id, '');
}
undef $_;
print STDOUT "\nOUT-A {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
#JCL(jcl-env) - insert the $O$br_id$C stuff to handle environment grouping
if (!($contents eq '')) {
$after =~ s/^\n//o if ($defenv);
$this_env = join("", $before, $closures
, $contents
, ($defenv ? '': &balance_tags())
, $reopens ); $_ = $after;
} else {
$this_env = join("", $before , $closures
, ($defenv ? '': &balance_tags())
, $reopens ); $_ = $after;
};
### Evan Welsh added the next 24 lines ##
} elsif (&defined_env($env)) {
print STDOUT "\nIN-B {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
# If I specify a function for the environment then it
# calls it with the contents truncated at the next section.
# It assumes I know what I'm doing and doesn't give a
# deferred warning.
$contents = $after;
if ($opt_arg) {
$contents = &process_environment(1, $env, $br_id, $style_info);
} else {
$contents = &process_environment(0, $env, $br_id, '');
}
print STDOUT "\nOUT-B {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
$this_env = join("", $before, $closures ,$contents, $reopens);
# there should not be anything left over
# $_ = $after;
$_ = '';
} elsif ($ignore{$env}) {
print STDOUT "\nIGNORED {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
# If I specify that the environment should be ignored then
# it is but I get a deferred warning.
$this_env = join("", $before , $closures , &balance_tags()
, $contents, $reopens );
$_ = $after;
&write_warnings("\n\\end{$env} not found (ignored).\n");
} elsif ($raw_arg_cmds{$env}) {
print "\nIN-C {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
# If I specify that the environment should be passed to tex
# then it is with the environment truncated at the next
# section and I get a deferred warning.
$contents = $after;
if ($opt_arg) {
$contents = &process_environment(1, $env, $br_id, $style_info);
} else {
$contents = &process_environment(0, $env, $br_id, '');
}
print STDOUT "\nOUT-C {$env $br_id}\n$contents\n" if ($VERBOSITY > 4);
$this_env = join("", $before, $closures
, $contents, &balance_tags(), $reopens );
$_='';
&write_warnings(
"\n\\end{$env $br_id} not found (truncated at next section boundary).\n");
} else {
$pattern = &escape_rx_chars($pattern);
s/$pattern/$closures/;
print "\nCannot find \\end{$env $br_id}\n";
$_ .= join('', &balance_tags(), $reopens) unless ($defenv);
}
if ($this_env =~ /$begin_env_rx|$begin_cmd_rx/) {
$_ = $this_env . $_;
} else { push (@processedE, $this_env) }
}
$_ = join('',@processedE) . $_;
$tmp = $_; undef $_;
&process_command($counters_rx, $tmp) if ($tmp =~ /$counters_rx/);
$_ = $tmp; undef $tmp;
$_
}
sub find_end_env {
# MRO: find_end_env($env,$contents,$rest)
#local ($env, *ref_contents, *rest) = @_;
my $env = $_[0];
my $be_rx = &make_begin_end_env_rx($env);
my $count = 1;
while ($_[2] =~ /($be_rx)(\n?)/s) { # $rest
$_[1] .= $`; # $contents
if ($2 eq "begin") { ++$count }
else { --$count };
#include any final \n at an {end} only
$_[2] = (($2 eq 'end')? $5 : '') . $'; # $rest
last if $count == 0;
$_[1] .= $1; # $contents
}
if ($count != 0) {
$_[2] = join('', $_[1], $_[2]); # $rest = join('', $contents, $rest);
$_[1] = ''; # $contents
return(0)
} else { return(1) }
}
sub process_group_env {
local($contents) = @_;
local(@save_open_tags) = @$open_tags_R;
local($open_tags_R) = [ @save_open_tags ];
print STDOUT "\nIN::{group $br_id}" if ($VERBOSITY > 4);
print STDOUT "\n:$contents\n" if ($VERBOSITY > 6);
# need to catch explicit local font-changes
local(%font_size) = %font_size if (/\\font\b/);
# record class/id info for a style-sheet entry
local($env_id, $tmp, $etmp);
if (($USING_STYLES) && !$PREAMBLE ) { $env_id = $br_id; }
# $env_id = "grp$br_id";
# $styleID{$env_id} = " ";
# $env_id = " ID=\"$env_id\"";
# }
undef $_;
$contents =~ s/^\s*$par_rx\s*//s; # don't start with a \par
if ($contents =~ /^\s*\\($image_switch_rx)\b\s*/s) {
# catch TeX-like environments: {\fontcmd ... }
local($image_style) = $1;
if ($USING_STYLES) {
$env_style{$image_style} = " " unless ($env_style{$image_style});
}
local($switch_cmd) = "do_cmd_${image_style}";
if (defined &$switch_cmd ) {
eval "\$contents = \&${switch_cmd}(\$')";
print "\n*** &$switch_cmd didn't work: $@\n$contents\n\n" if ($@);
} elsif ($contents =~ /$par_rx/) {
# split into separate image for each paragraph
local($par_style,$this_par_img) = '';
local(@par_pieces) = split($par_rx, $contents);
local($this_par,$par_style,$par_comment);
$contents = '';
while (@par_pieces) {
$this_par = shift @par_pieces;
if ($this_par =~ /^\s*\\($image_switch_rx)\b/s) {
$image_style = $1;
$par_style = 'P.'.$1;
$env_style{$par_style} = " " unless ($env_style{$par_style});
}
# no comment: source is usually too highly encoded to be meaningful
# $par_comment = &make_comment($image_style,$this_par);
$this_par_img = &process_in_latex("\{".$this_par."\}");
$contents .= join('' #,"\n", $par_comment
, "\n", $this_par_img
, "
\n");
if (@par_pieces) {
# discard the pieces from matching $par_rx
$dum = shift @par_pieces;
$dum = shift @par_pieces;
$dum = shift @par_pieces;
$dum = shift @par_pieces;
$dum = shift @par_pieces;
$dum = shift @par_pieces;
# $contents .= "\n
\n";
}
}
} else {
$contents = &process_undefined_environment("tex2html_accent_inline"
, ++$global{'max_id'},"\{".$contents."\}");
}
} elsif ($contents =~ /^\s*\\(html)?url\b($O\d+$C)[^<]*\2\s*/) {
# do nothing
$contents = &translate_environments($contents);
$contents = &translate_commands($contents);
} elsif (($env_switch_rx)&&($contents =~ s/^(\s*)\\($env_switch_rx)\b//s)) {
# write directly into images.tex, protected by \begingroup...\endgroup
local($prespace, $cmd, $tmp) = ($1,$2,"do_cmd_$2");
$latex_body .= "\n\\begingroup ";
if (defined &$tmp) {
eval("\$contents = &do_cmd_$cmd(\$contents)");
}
$contents = &translate_environments($contents);
$contents = &translate_commands($contents);
undef $tmp; undef $cmd;
$contents .= "\n\\endgroup ";
} elsif ($contents =~ /^\s*\\([a-zA-Z]+)\b/s) {
local($after_cmd) = $';
local($cmd) = $1; $tmp = "do_cmd_$cmd"; $etmp = "do_env_$cmd";
if (($cmd =~/^(rm(family)?|normalsize)$/)
||($declarations{$cmd}&&(defined &$tmp))) {
do{
local(@save_open_tags) = @$open_tags_R;
eval "\$contents = \&$tmp(\$after_cmd);";
print "\n*** eval &$tmp failed: $@\n$contents\n\n" if ($@);
$contents .= &balance_tags();
};
} elsif ($declarations{$cmd}&&(defined &$etmp)) {
eval "\$contents = \&$etmp(\$after_cmd);";
} else {
$contents = &translate_environments($contents);
$contents = &translate_commands($contents)
if ($contents =~ /$match_br_rx/o);
# Modifies $contents
&process_command($single_cmd_rx,$contents) if ($contents =~ /\\/o);
}
undef $cmd; undef $tmp; undef $etmp;
} else {
$contents = &translate_environments($contents);
$contents = &translate_commands($contents)
if ($contents =~ /$match_br_rx/o);
# Modifies $contents
&process_command($single_cmd_rx,$contents)
if ($contents =~ /\\/o);
}
$contents . &balance_tags();
}
# MODIFIES $contents
sub process_environment {
local($opt, $env, $id, $styles) = @_;
local($envS) = $env; $envS =~ s/\*\s*$/star/;
local($env_sub,$border,$attribs,$env_id) = ("do_env_$envS",'','','');
local($original) = $contents;
if ($env =~ /tex2html_deferred/ ) {
$contents = &do_env_tex2html_deferred($contents);
return ($contents);
}
$env_id = &read_style_info($opt, $env, $id, $styles)
if (($USING_STYLES)&&($opt));
if (&defined_env($env)) {
print STDOUT ",";
print STDOUT "{$env $id}" if ($VERBOSITY > 1);
# $env_sub =~ s/\*$/star/;
$contents = &$env_sub($contents);
} elsif ($env =~ /tex2html_nowrap/) {
#pass it on directly for LaTeX, via images.tex
$contents = &process_undefined_environment($env, $id, $contents);
return ($contents);
# elsif (&special_env) { # &special_env modifies $contents
} else {
local($no_special_chars) = 0;
local($failed) = 0;
local($has_special_chars) = 0;
&special_env; # modifies $contents
print STDOUT "\n