#- -*- perl -*- header inserted automatically # $Id: texexpand.pin,v 1.11 2000/08/23 04:09:05 RRM Exp $ # # texexpand for LaTeX2HTML v2K # Based on texexpand by Robert Thau, MIT AI lab, including modifications by # Franz Vojik # Nikos Drakos # Sebastian Rahtz # Maximilian Ott # Martin Boyer # Herbert Swan # Jens Lippmann # Recognizes \documentclass, \documentstyle, \usepackage, \RequirePackage, # \begin{verbatim}...\end{verbatim}, %begin{latexonly}...%end{latexonly}, # \begin{latexonly}...\end{latexonly}, \input, \include, \verb, \latex # \endinput, \end{document} # \includecomment, \excludecomment # \begin{"to exclude"}, \end{"to exclude"} # %begin{"to exclude"}, %end{"to exclude"} ############################################################################### # Notes: # # General translation mechanism: # # # The main program latex2html calls texexpand with the document name # in order to expand some of its \input and \include statements, here # also called 'merging', and to write a list of sensitized style, class, # input, or include file names. # When texexpand has finished, all is contained in one file, TMP_foo. # (assumed foo.tex is the name of the document to translate). # # In this version, texexpand cares for following environments # that may span include files / section boundaries: # a) \begin{comment} # b) %begin{comment} # c) \begin{any} introduced with \excludecomment # d) %begin{any} # e) \begin{verbatim} # f) \begin{latexonly} # g) %begin{latexonly} # # a)-d) cause texexpand to drop its contents, it will not show up in the # output file. You can use this to 'comment out' a bunch of files, say. # # e)-g) prevent texexpand from expanding input files, but the environment # content goes fully into the output file. # # Together with each merging of \input etc. there are so-called %%%texexpand # markers accompanying the boundary. # # When latex2html reads in the output file, it uses these markers to write # each part to a separate file, and process them further. # # # # Detailed technical notes: # # 1. %begin{latexonly} and %end{latexonly} have to be on a separate line. # Anything between these tags (including the tags) is discarded. # 2. \begin{latexonly} and \end{latexonly} have to be on a separate line. # Anything between these tags (including the tags) is not expanded. # 3. [%\]begin{"to exclude"} and [%\]end{"to exclude"} have to be on a # separate line. # Anything between these tags (including the tags) is discarded. # 4. \begin{verbatim/verbatim*} and \end{verbatim/verbatim*} have to be # on a separate line. # Anything between these tags (including the tags) is not expanded. # 5. The scope of any such tags may extend over several files. # The opening tag for latexonly may occur on a different include level # than the closing tag. # The opening tag for verbatim/"to exclude" must occur within the same # file than the closing tag. # 6. Warnings are printed when the document has been parsed and open # tags remain. # 7. When in a "to exclude"/verbatim environment, texexpand won't recognize # ANY command except the corresponding closing tag. # There cannot be any nested constructions. # This behaviour is identical to that of LaTeX. # 8. \begin{latexonly},\end{latexonly} may be nested, whereas # %begin{latexonly},%end{latexonly} may not be nested. # 9. A "%" tag cannot close a "\" tag, and vice versa. # 10. Every \document(class|style), \usepackage, \input and \include command # has to be on a separate line. # 11. Everything behind a `%' that isn't preceded by a `\' is regarded as # a comment, i.e. it is printed but not interpreted. # 12. If any command listed in 10. is preceded by an occurence of `\verb' or # `\latex' then it is NOT interpreted. This crashes on lines like this: # blah blah \verb+foo foo+ \input{bar} % bar won't be loaded! # 13. Packages provided via \usepackage are handled the same way as # `options' in \document(class|style), i.e. they are included when # -auto_exclude is off, the package isn't in @dont_include *OR* the # package is in @do_include (new). They are added to the style file # together with their options if the file itself hasn't been merged. # \documentclass[options]{class} searches for every option.clo, # \documentstyle[options]{style} searches for every option.sty. # \usepackage[options]{packages} searches for every package.sty. # 14. Each texinputs directory is searched for input files/styles. If it # ends in `//', the whole subdirectory tree is searched. # 15. \input / \include merge the given file (if found under the given # name or with .tex extension) if its basename is in @do_include or if it # isn't in @dont_include or if the given filename doesn't end in # .sty/.clo/.cls when -auto_exclude is set. # ############################################################################### # History: # mro = Marek Rouchal # jcl = Jens Lippmann # # $Log: texexpand.pin,v $ # Revision 1.11 2000/08/23 04:09:05 RRM # -- fixed typo using $latexonlyenv instead of $latexonlytype # -- keep $mute=0 for fake-env inside $latexonly envs. # -- use \n instead of ',' as delimiter for STYLES lising, # with LaTeX-2e documents, starting with \documentclass # # Revision 1.10 1999/11/03 11:29:50 RRM # -- recoded $ignore_cmd_rx , thanks Achim Haertel for reporting problem # # Revision 1.9 1999/10/06 22:04:13 MRO # # -- texexpand: latex2html calls texexpand with the -out option instead of # output redirection: this is safer on non-UNIX platforms # -- pstoimg: now there's no default cropping (useful for standalone # conversions). latex2html was changes appropriately # -- minor cleanups in latex2html script and documentation # # Revision 1.8 1999/10/03 18:40:42 MRO # # -- some cleanups for beta2 # -- "make check" now checks all Perl code # # Revision 1.7 1999/09/16 11:27:01 RRM # -- $keepcomments environments do not need to start at the beginning # of the line # -- %begin{latexonly} and $fakeenv environments are now correctly # handled inside $keepcomments environments. # # Revision 1.6 1999/06/24 07:28:59 MRO # # # -- removed L2HMODULE # -- fixed processing of -info switch # -- changed option order for dvips on win32 (thanks JCL) # -- bumped version to 99.2a8 # # Revision 1.5 1999/06/10 23:00:00 MRO # # # -- fixed an artifact in the *ball icons # -- cleanups # -- option documentation added # -- fixed bug in color perl (determining path to rgb/crayola) # # Revision 1.4 1999/06/02 12:11:23 RRM # -- the option 'style_file' should be 'save_styles' ; fixed. # -- extended $ignore_cmd_rx to ignore \input commands that are contained # within conditional TeX code; (e.g. in macro definitions) # -- ignore \usepackage commands in brackets; e.g. [\usepackage] # # Revision 1.3 1999/05/31 07:49:04 MRO # # # - a lot of cleanups wrt. OS/2 # - make test now available (TEST.BAT on Win32, TEST.CMD on OS/2) # - re-inserted L2HCONFIG environment # - added some new subs to L2hos (path2os, path2URL, Cwd) # # Revision 1.2 1999/05/17 21:31:00 MRO # # # -- make texexpand warning-free and start making it use strict # compliant # # Revision 1.1 1999/05/11 06:10:02 MRO # # # - merged config stuff, did first tries on Linux. Simple document # passes! More test required, have to ger rid of Warnings in texexpand # # Revision 1.30 1999/04/09 18:09:21 JCL # changed my e-Mail address # # Revision 1.29 1998/12/02 07:23:35 RRM # -- closedir(SUBDIR) instead of close(SUBDIR) ; thanks Marek Bukowy # else can run out of filehandles # # Revision 1.28 1998/08/14 09:35:21 RRM # -- allow the arguments and options to \documentclass (style) # and \usepackage commands to extend over several lines # # Revision 1.27 1998/07/03 11:44:54 RRM # -- ignore $keepcomments environments when $latexonly # # Revision 1.26 1998/06/26 08:16:46 RRM # -- quoted $dd for the sake of Win95 and DOS # # Revision 1.25 1998/05/14 13:34:11 latex2html # texexpand for V98.2 # # -- reordered some of the early code to use the $TEXINPUTS variable # rather than $ENV{'TEXINPUTS'} # -- LaTeX2HTML passes its value via the command-line # -- Web2C should *not* be used # -- there is no searching along paths for TeX, just for LaTeX2HTML # # Revision 1.24 1998/05/09 05:34:13 latex2html # -- removed local customisation, sorry # -- removed the old/commented call to use Override.pm # # Revision 1.23 1998/05/09 05:29:54 latex2html # -- cosmetic changes to $debug messages # -- removed duplicated path-searching # -- fixed error whereby full path-names got lost # -- experimented with the Web2C options # Are these actually useful ? # # Revision 1.22 1998/04/28 11:53:08 latex2html # implemented Fabrice Popineau's changes for Win32 compatibility # # -- more functions defined in Override.pm # -- checks for kpsewhich and Web2C # # Revision 1.21 1998/02/19 22:26:49 latex2html # th-darmstadt -> tu-darmstadt # # Revision 1.20 1997/12/04 07:35:25 RRM # -- include a use lib command, to find the Override.pm module # -- generalised pattern for matching verbatim-like environments # # Revision 1.19 1997/11/05 11:31:27 RRM # -- changed the way Override.pm is called; this should work better. # # Revision 1.18 1997/10/14 16:28:16 JCL # o added command line option -unsegment and $UNSEGMENT # Use latex2html -unsegment, or texexpand -unsegment, or set $UNSEGMENT to 1 # in latex2html.config. # # Revision 1.17 1997/10/10 10:40:07 RRM # -- Oops, didn't quite get that right last time. # # Revision 1.15 1997/10/09 07:11:14 RRM # -- temporary fix to the Override problem # # Revision 1.14 1997/10/06 16:02:29 UW # override.pm contains now unlink() too. Adapted the call to override.pm # accordingly # # Revision 1.13 1997/10/06 14:49:37 UW # Added support for override.pm to texepand. # Furthermore, all references to the path-delimiter ':' # should now be made via $envkey # Texepand used previously the variable $DS as directory delimiter. Since # all other modules use $dd, I changed $DS to $dd. # # Revision 1.12 1997/09/27 10:36:14 JCL # o several enhancements to the inline documentation # o small fix to &interprete, \input|include now doesn't loose the comment # if merging fails # o introduced -no_segments switch (or set shell variable $NO_SEGMENTS to 1): # This will force a segmented document to expand its segment files, so # that it may be processed as a whole with LaTeX2HTML. # Use this feature to test a segmented document or whenever a document # needs to be fully expanded. # XtractFAQ will need this feature to determine the FAQ entries. # # Revision 1.11 1997/06/15 18:26:00 JCL # Now texexpand will only merge files that exist *and* are readable. # (Trying to merge a void link caused it to crash on my site.) # # Revision 1.10 1997/06/06 14:13:54 RRM # This is the texexpand for V97.1. # # only dofference is that it is quieter under -debug . # use -verbosity as well, to get all the previous messages, # when is at least 2. # # Revision 1.9 1997/03/24 12:26:15 RRM # Implemented a new class of environments: $keepcomments . # This allows environments of TeX-like code to be preserved verbatim, # and passed to LaTeX for processing: e.g. picture, makeimage, xy etc. # Also, fixed the bug which loses any code on the same line as, but preceding # an \input or \include command. # # Revision 1.8 1997/03/03 20:35:42 JCL # added some comments # # Revision 1.7 1996/12/21 20:30:00 JCL # - small changes to get verbatim parsed separately from verbatim* # - provided expand test for regression suite # - bound diagnostic status messages to debug level # # texexpand is operational # # Revision 1.6 1996/12/20 20:27:08 JCL # fixed severe bug with my $DS variable :-[ # # Revision 1.5 1996/12/20 18:51:54 JCL # *** empty log message *** # # Revision 1.4 1996/12/20 01:29:39 JCL # Moved initialisation tokens for @dont_include to latex2html.config, # to have a more central place to control them. # # Revision 1.3 1996/12/18 04:36:58 JCL # substantial changes to allow for environments grouping several files # o chunked code into more functions # o revised documentation # o designed new parsing logic # o introduced parsing of \includecomment, \excludecomment to care # for self-defined comment environments # o handles default "comment" environment as known from html.sty # o and much more (see comments) # # # V96.2a6 Fixed bug in recursive directory search for texinputs. Thanks to # Marcus Harnisch for reporting the bug. # Included possibility of adding extensions to $TEXE_DONT_INCLUDE # e.g. '.psfig', so that all files ending in .psfig won't be # \input or \include 'ed. Same for $TEXE_DO_INCLUDE. Added `o' # option to some regexps. # ------- # V96.2a5 Followed suggestions by Jens Lippmann regarding file inclusion # logic. Added \RequirePackage. Some minor changes. # ------- # V96.2a4 Fixed severe bugs in comments regexp and usepackage logic. # Thanks to Ross Moore for reporting them. # Added support for LaTeX2e .clo filename extension (see 7. above) # Cleaned up some code, added more comments # Added command line option -do_include # ------- # V96.2a3 Fixed bugs & typos # ------- # V96.2a2 Following suggestions made by # Jens Lippmann # Added recursive directory search for include files. # Added @do_include: Forces inclusion of packages (when found) # Some bug fixes # ------- # V96.2a1 released Thu Oct 24 16:51:36 MET 1996 # ------- # 21-NOV-96 mro # Almost complete rewrite by Marek Rouchal # ############################################################################### use vars qw($LATEX2HTMLDIR $SCRIPT); #- the (texlive) wrapper sets these values #- or it is stored in the enviroment #unless @wrapper@ || @texlive@ BEGIN { # print STDERR "scanning for l2hdir\n"; if($ENV{LATEX2HTMLDIR}) { $LATEX2HTMLDIR = $ENV{LATEX2HTMLDIR}; } else { $ENV{LATEX2HTMLDIR} = $LATEX2HTMLDIR = '@LATEX2HTMLDIR@'; } if(-d $LATEX2HTMLDIR) { push(@INC,$LATEX2HTMLDIR); } else { die qq{Fatal: Directory "$LATEX2HTMLDIR" does not exist.\n}; } } #fi use L2hos; my $RELEASE = '@distver@'; my ($VERSION) = q$Revision: 1.11 $ =~ /:\s*(\S+)/; my $envkey = L2hos->pathd(); # $dd is the directory delimiter character my $dd = L2hos->dd(); my $prompt = "\ntexexpand:"; # Initialize styles to be excluded (if any). # This is a sanity setup in case the \d is garbled during shell # variable handling. # The initialisation really comes from latex2html.config. my @dont_include = ('\d+pt'); # These are the extensions to be auto-excluded my $dont_include_ext_rx = 'sty|cls|clo'; if($ENV{'TEXE_DONT_INCLUDE'}) { &process_dont_include(split(/$envkey/,$ENV{'TEXE_DONT_INCLUDE'})); } # Initialize styles to be included (if any). This overrides @dont_include # These are the extensions to be auto-included my $do_include_ext_rx = ''; if($ENV{'TEXE_DO_INCLUDE'}) { &process_do_include(split(/$envkey/,$ENV{'TEXE_DO_INCLUDE'})); } # Parse arguments use Getopt::Long; my %opt = (); unless(GetOptions(\%opt, qw(-help -version -debug -verbose -w -do_include=s@ -dont_include=s@ -auto_exclude -unsegment -save_styles=s -texinputs=s@ -output=s))) { die "$prompt Error: Invalid option(s) specified.\n"; } if($opt{help}) { print STDERR "-- to be implemented --\n"; exit 0; } &banner(); if($opt{version}) { exit 0; } my $debug = $opt{debug} || 0; # no debug by default $debug = 2 if($opt{verbose}); if($opt{dont_include} && @{$opt{dont_include}}) { &process_dont_include(@{$opt{dont_include}}); } if($opt{do_include} && @{$opt{do_include}}) { &process_do_include(@{$opt{do_include}}); } my $TEXINPUTS = ''; if(@{$opt{texinputs}}) { $TEXINPUTS = join($envkey, @{$opt{texinputs}}); } unless(@ARGV) { die "$prompt Error: No input file specified.\n"; } my $infile = shift(@ARGV); if(@ARGV) { die "$prompt Error: More than one input file specified.\n"; } #FP: Web2C does not use @texinputs at all # moreover, it uses kpsewhich to find files, so no need to # bother with @texinputs # $Web2C = &find_executable('kpsewhich',$ENV{'PATH'}); #RRM: I don't think it is a good idea to use kpsewhich this way my $Web2C = ''; # Initialize texinputs my @texinputs = qw(.); if($TEXINPUTS) { my $dir; foreach $dir (split(/$envkey/, $TEXINPUTS)) { push (@texinputs, $dir) if(($dir =~ /\S+/) && ($dir ne '.')); # save only if non-empty } } ## Ignore the environment # if((!$TEXINPUTS)&&(defined $ENV{'TEXINPUTS'})) { # foreach $dir (split(/$envkey/,$ENV{'TEXINPUTS'})) { # push (@texinputs, $dir) # if (($dir =~ /\S+/)&&($dir ne '.')); # save only if non-empty # } # } ## Expand paths with `~' # $homeDir = (getpwuid($<))[7]; # grep(s|^~$dd|$homeDir$dd|, @texinputs); # grep((m|^~([^$dd]+)$dd|) && # ($homeDir = (getpwnam($1))[7]) && (s||$homeDir$dd|), @texinputs); &initialise; &main; exit(0); sub banner { print STDERR "texexpand V$RELEASE (Revision $VERSION)\n"; } sub initialise { # Create generic regexp's: # If this matches before a command, the command is ignored. $ignore_cmd_rx = # '(\\latex\W|\\verb|\\expandafter|\\ifx|\\else\W|[\|\[\@]$)'; "(\\\\latex\\W|\\\\verb|\\\\expandafter|\\\\ifx|\\\\else\\W|[\\|\\[\\@]\$)"; # This matches a square bracket pair (typically an option list). $options_rx = '(\[[^\]]*\]|)'; # This matches a single argument. $arg_rx = '\{([^\}]*)\}'; $fakeenv_rx = '(comment)'; $keepcomments_rx = '(picture|makeimage|xy|diagram)'; # Print environments my $dir; if ($debug) { print STDERR "$prompt LaTeX2HTML inputs are in:"; # foreach $dir (@texinputs) { print STDERR "$prompt $dir"; } if ($Web2C) { print STDERR "$prompt " . `kpsewhich -expand-var \$TEXINPUTS` ; #RRM: I cannot make this work, to replace the `...` in the line above # local($kpse) = "kpsewhich -expand-var=\$TEXINPUTS"; # print STDERR "$prompt $kpse"; # $kpse = system($kpse); # print STDERR "$prompt $kpse"; } else { foreach $dir (@texinputs) { print STDERR "$prompt $dir"; } } if ($debug>1) { print STDERR "\n$prompt Special names (not to be input or included):"; foreach $name (@dont_include) { print STDERR "$prompt $name"; } print STDERR "\n$prompt Extensions of files not to be input or included: " . "$dont_include_ext_rx"; print STDERR "\n$prompt Special names (to *be* input or included):"; foreach $name (@do_include) { print STDERR "$prompt $name"; } print STDERR "\n$prompt Extensions of files to *be* input or included: " . "$do_include_ext_rx\n"; } } print STDERR "\n$prompt %--- Expanding $infile" if ($debug>1); } sub main { # Note that verbatim/latexonly may split over different files! # $verbatim is 1 if inside a verbatim environment, # $latexonly is > 0 if inside latexonly environments # $includelevel indicates the depth of include/input local($includelevel) = 0; local($verbatim,$verbatimname) = (0,""); local($latexonly,$latexonlytype) = (0,""); local($fakeenv,$fakeenvname,$fakeenvtype) = (0,"",""); local($keepcomments,$keepcommentsname) = (0,""); local($active,$mute) = (1,0); # Main procedure $dont_include_rx = join("|",@dont_include); $do_include_rx = join("|",@do_include); if($opt{save_styles}) { open(STYLES,">$opt{save_styles}") || die "$prompt Error: Cannot open style file '$opt{save_styles}': $!\n"; } if($opt{output}) { open(OUT,">$opt{output}") || die "$prompt Error: Cannot open output file '$opt{output}': $!\n"; } else { open(OUT,">&STDOUT"); } &process_file($infile); # the workhorse... close(OUT) if ($opt{output}); close(STYLES) if ($opt{save_styles}); print STDERR "$prompt Warning: No ${latexonlytype}end\{latexonly\} found." if ($latexonly); print STDERR "$prompt Warning: No ${fakeenvtype}end\{$fakeenvname\} found." if ($fakeenv); print STDERR "$prompt Warning: No \\end\{$keepcommentsname\} found." if ($keepcomments); print STDERR "$prompt Warning: No \\end{verbatim} found." if ($verbatim); } # Include and parse a file. # This routine is recursive, see also &process_input_include_file, # &process_document_header, and &process_package_cmd. # # Two global flags control the states of texexpand. # o $active is true if we should interprete the lines to expand # files, check for packages, etc. # o $mute is true if we should prevent the lines from going # into the out file. # # We have three general states of texexpand: # 1) interprete the lines and pass them to the out file # This is the normal case. # Corresponding: $active true, $mute false # 2) interprete minimal and suppress them # This is when parsing inside a comment environment, which # also would retain its body from LaTeX. # => $active false, $mute true # 3) interprete minimal and pass the lines to the out file # This is inside a verbatim or latexonly environment. # The line of course must be at least interpreted to # determine the closing tag. # => $active false, $mute false # # Any environment may extend over several include files. # Any environement except verbatim and latexonly may have its # opening or closing tag on different input levels. # The comment and verbatim environments cannot be nested, as # is with LaTeX. # We must at least parse verbatim/comment environments in # latexonly environments, to catch fake latexonly tags. # # The work scheme: # Five functions influence texexpand's behavior. # o &process_file opens the given file and parses the non-comment part in # order to set $active and $mute (see above). # It calls &interprete to interprete the non-comment content and either # continues with the next line of its file or terminates if &interprete # detected the \end{document} or an \endinput. # o &interprete handles some LaTeX tags with respect to the three states # controlled by $active and $mute. # Regarding to \input|include, \document(class|style), and # \(use|Require)package the functions &process_input_include_file, # &process_document_header, and &process_package_cmd are called respectively. # o These three functions check if the file name or option files are enabled # or disabled for merging (via TEXE_DO_INCLUDE or TEXE_DONT_INCLUDE). # Any file that is to include will be 'merged' into the current file, i.e. # the function &process_file is called at this place in time (recursively). # This will stop interpretation at the current line in file, start with the # new file to process and continues with the next line as soon as the new # file is interpreted to its end. # # The call tree (noweb+xy.sty would be handy here): # # main # | # v # +->process_file # | | # | v # | interprete (with respect to the current line, one of that three) # | | | | # | v v v # | process_input_include_file process_document_header process_package_cmd # | | | | # | v v v # +----+---------------------------+------------------------+ # # Bugs: # o Since the latexonly environment is not parsed, its contents # might introduce environments which are not recognized. # o The closing tag for latexonly is not found if hidden inside # an input file. # o One environment tag per line, yet! # o If I would have to design test cases for this beast I would # immediately desintegrate into a logic cloud. # # Notes: # o Ok, I designed test cases for it. # Please refer to test 'expand' of the regression test suite # in the developers' module of the l2h repository. # o -unsegment feature: # In this (rare) case, the user wants to translate a segmented document # not in segments but in a whole (for testing, say). # We enable this by recognizing the \segment command in &interprete, # causing the segment file to be treated like \input but loosing the first # lines prior to \startdocument (incl.), as controlled via $segmentfile. # On how to segment a document you are best guided by section # ``Document Segmentation'' of the LaTeX2HTML manual. # sub process_file { my ($infile) = @_; local(*IN); local($comments,$before,$orig); # Keep track of input/include level $includelevel++; open(IN,"<$infile") || die "$prompt Cannot open $infile\n"; print STDERR "$prompt %--- Processing $infile" if ($debug > 1); # if we don't include this file marker LaTeX2HTML won't split # the document at this point print OUT "%%% TEXEXPAND: INCLUDED FILE MARKER $infile\n" if ($includelevel > 1 && $active); if ($segmentfile) { # This variable is set by &interprete to change the behavior of the # next file to merge. while() { # strip comments s/(^|[^\\])(\\\\)*(%.*)/$comments = $3; $1.$2/e; last if /^\s*\\startdocument/; } $segmentfile = 0; } while() { #for debugging $orig = $_; # lift comments from line $comments = ""; if ($keepcomments) { $comments = '' } else { s/(^|[^\\])((?:\\\\)*)(%.*)/$comments = $3; $1.$2/e } # Deal with latexonly environment(s) # begin/end tags must be on single line if (!$fakeenv && !$verbatim && !$latexonly && ( ($comments =~ /%\s*begin\s*\{\s*latexonly\s*\}/)|| ($keepcomments && /%\s*begin\s*\{\s*latexonly\s*\}/))) { # A comment latexonly environment. May not be nested. $latexonly = 1; $latexonlytype = "%"; $active = 0; $mute=1; } elsif (!$fakeenv && !$verbatim && (!$latexonly || $latexonlytype eq "\\") && /^\s*\\begin\s*\{\s*latexonly\s*\}/) { # A latexonly environment. LaTeX types may be nested, # but discard them as long as we are in a latexonly # comment part. # We definitely don't like to push the "\\", "%" types # onto a stack to keep track of them in alternating types. # On the other hand we won't allow for a comment type # part to close a LaTeX environment, eg. $latexonly++; $latexonlytype = "\\"; $active = 0; } elsif (!$fakeenv && !$verbatim && ( ($comments =~ /%\s*begin\s*\{\s*$fakeenv_rx\s*\}/)|| ($keepcomments && /%\s*begin\s*\{\s*$fakeenv_rx\s*\}/))) { # Begin of a fake comment part. May not be nested. $fakeenv=1; $fakeenvtype="%"; # Remember the part name. $fakeenvname = $1; $active=0; $mute=1 unless $latexonly; } elsif (!$fakeenv && !$verbatim && /^\s*\\begin\s*\{\s*$fakeenv_rx\s*\}/) { # Begin of a fake environment. May not be nested. $fakeenv="1"; $fakeenvtype="\\"; # Remember the environment name. $fakeenvname = $1; $active=0; $mute=1 unless $latexonly; } elsif (!$fakeenv && !$verbatim && !$latexonly && /^\s*\\begin\s*\{\s*$keepcomments_rx\s*\}/) { # Begin of a keepcomments environment. May be nested. if (! $keepcomments) { $keepcomments = 1; # Remember the environment name. $keepcommentsname = $1; } elsif ($keepcommentsname eq $1) { $keepcomments++; } $active=1; $mute=1 unless $latexonly; } # elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*verbatim(\*)?\s*\}/) { elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*(\w*[Vv]erbatim\w*\*?)\s*\}/) { ($before,$verbatimname) = ($`,$1); ($active,$verbatim) = (0,1) unless ($before =~ /$ignore_cmd_rx/o); } print STDERR "$prompt %--line::${orig}%-- active=$active mute=$mute ". "latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim ". "keepcomments=$keepcomments" if ($debug > 1) && $orig =~ /\\begin|%\s*begin/; # Interprete the single line, care for file to merge, # locate new comment environments, etc. # This one does recursive calls. # Stop this file if we are told so. last unless &interprete($_, $comments); last if $end_document; # Sorry for that ifs... if (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "%" && ( ($comments =~ /%\s*end\s*\{\s*latexonly\s*\}/)|| ($keepcomments && /%\s*end\s*\{\s*latexonly\s*\}/))) { # only %end{latexonly} can close the part $latexonly=0; $active = 1; $mute = 0; } elsif (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "\\" && /^\s*\\end\s*\{\s*latexonly\s*\}/) { # only \end{latexonly} can close the environment $latexonly--; $active = ($latexonly ? 0 : 1); } elsif ($fakeenv && $fakeenvtype eq "%" && ( ($comments =~ /%\s*end\s*\{\s*$fakeenv_rx\s*\}/)|| ($keepcomments && /%\s*end\s*\{\s*$fakeenv_rx\s*\}/))) { # only a matching %end{name} can close the part if ($1 eq $fakeenvname) { $fakeenv=0; $active = ($latexonly ? 0 : 1); $mute=0 unless $latexonly && $latexonlytype eq "%"; } } elsif ($fakeenv && $fakeenvtype eq "\\" && /^\s*\\end\s*\{\s*$fakeenv_rx\s*\}/) { # only a matching \end{name} can close the environment if ($1 eq $fakeenvname) { $fakeenv=0; $active = ($latexonly ? 0 : 1); $mute=0 unless $latexonly; } } elsif ($keepcomments && /^[^%]*?\\end\s*\{\s*$keepcomments_rx\s*\}/) { # only a matching \end{name} can close the part if ($1 eq $keepcommentsname) { $keepcomments--; $keepcommentsname = '' unless ($keepcomments); $active = ($latexonly ? 0 : 1); $mute=0 unless $latexonly && $latexonlytype eq "%"; } } # elsif ( /\\end\s*\{\s*verbatim(\*)?\s*\}/) { elsif ( /\\end\s*\{\s*(\w*[Vv]erbatim\w*\*?)\s*\}/) { if ($1 eq $verbatimname) { $verbatim=0; $active = ($latexonly ? 0 : 1); } } print STDERR "$prompt %--line::${orig}%-- active=$active mute=$mute ". "latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim" if ($debug > 1) && $orig =~ /\\end|%\s*end/; } print OUT "%%% TEXEXPAND: END FILE $infile\n" if ($includelevel > 1 && $active); close(IN); $includelevel--; } # Handle the LaTeX tags \input, \include, \endinput, \documentclass, # \documentstyle, \usepackage, \RequirePackage, \end{document}, # \includecomment, \excludecomment with respect to the three states # controlled by $active and $mute. # The state 'interprete minimal and suppress' ($active false, $mute true) # does not require further actions, just do nothing. # When in $active state, call one of &process_input_include_file, # &process_document_header, or &process_package_cmd to examine the # apropriate line further. # # Returns 0 if the caller is to stop interpreting the current file (\endinput). # Returns 1 otherwise. # Set $end_document to 1 if an \end{document} is detected (this stops # the whole task of texexpand). # sub interprete { local($_,$comments) = @_; local($line) = $_; local($before,$after); # the default to print to OUT $line =~ s/\n/$comments\n/; if ($active) { #looses $comments on successful input/include, document header, #or usepackage/RequirePackage if (/\\(input|include)\W/) { ($before,$after) = ($`,$&.$'); if ($before =~ /$ignore_cmd_rx/o) { print OUT $line; } else { if (length($before)) { #put prefix to \\input etc. to single line print OUT $before,"\%\n"; #mask special chars $before =~ s/(\W)/\\$1/g; #strip prefix from total line incl. comments $line =~ s/$before//; } # print total line incl. comments if merging failed print OUT $line #may re-enter &process_file unless &process_input_include_file($after); } } # elsif (/\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/s) { elsif (/\\(usepackage|RequirePackage)[^]]/s) { $before = $`; if($before =~ /$ignore_cmd_rx/o) { print OUT $line; } else { while (!/\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/so) { chomp; $_ =~ s/%.*$//; $_ .= ; } &process_package_cmd($_); } } # elsif (/\\document(class|style)\s*$options_rx\s*$arg_rx/o) { elsif (/\\document(class|style)/o) { $before = $`; if ($before =~ /$ignore_cmd_rx/o) { print OUT $line; } else { while (!/\\document(class|style)\s*$options_rx\s*$arg_rx/so) { chomp; $_ =~ s/%.*$//; $_ .= ; } &process_document_header($_); } } elsif ($opt{unsegment} && /^\s*\\segment(\*?)\s*$options_rx\s*$arg_rx\s*$arg_rx\s*/) { # We found a segmenting command which must vanish. # Therefore, mutate the \segment into the section command specified # by $4 (section, subsection, ...) and $1 (* or empty) followed by # the section text, and an \input statement with filename $3. # To obtain the section text, we need to take a preview to the next # lines, as it might be truncated with %'s. # Line truncations between the regex above (like \segment%\n) are # not recognized. # There are as much lines fetched as required to satisfy the equality # of the amounts of left and right braces, since we aren't able to # handle nested brace pairs. # If this strategy fails, texexpand is terminated, thereby satisfying # the 'all or nothing' requirement. local($file) = $3; print OUT "\\$4$1"; $after = $_ = $'; #get tail local($left,$right) = (tr/\{/\{/,tr/\}/\}/); while (($left != $right) || !$left) { #braces not balanced or no opening brace at all, get next line $_ = ; die "$prompt arguments to \\segment are too complex\n" unless length($_) && length($after) < 500; # strip comments s/(^|[^\\])(\\\\)*(%.*)/$1$2/; $left += tr/\{/\{/; $right += tr/\}/\}/; $after .= $_; } $after =~ /\}([^\}]*)$/; $after = $1; $_ = $`; # Ok we have it. $_ should carry the whole section title plus # opening brace, the original lines squeezed into one. print OUT $_,"}\n"; # set this globally to control behavior of next &process_file $segmentfile = 1; die "$prompt segment file <$file> could not be merged" unless &process_input_include_file("\\input\{$file\}$after"); } # Print the first /end{document}, only. Truncate anything after it. elsif (/^(.*\\end\{document\})/) { $before = $1; if ($before =~ /$ignore_cmd_rx/o) { print OUT $line; } else { print OUT "$before\n"; $end_document++; } } elsif (/\\endinput/) { $before=$`; return(0) #stop this file if ($includelevel > 1 && $before !~ /$ignore_cmd_rx/o); } elsif (/\\(in|ex)cludecomment\s*$arg_rx/o) { local($mode,$env) = ($1,$2); $env =~ s/\s//g; #strip space # escape special chars (such as "*"), but reject "|" $env =~ s/(\W)/\\$1/g; unless ($env =~ /\|/) { $fakeenv_rx =~ /\((.*)\)/; # might also be empty local(@envs) = split(/\|/,$1); if ($mode eq "ex") { push(@envs,$env); } else { # a dumb try to forget the comment environment if redefined $env =~ s/\\/\\\\/g; #must not use $_ inside grep pattern! @envs = grep(!/$env/,@envs); } $fakeenv_rx = "\(".join("|",@envs)."\)"; } } else { print OUT $line; } } elsif (! $mute) { # print line if in verbatim/comment mode print OUT $line; } return(1); #continue if not $end_document } sub process_input_include_file { local($_) = @_; local($before,$after,$class,$styles); $_ =~ s/\n$//; print STDERR "$prompt %--- Found include at level $includelevel: $_" if($debug); # Get filename local($filename) = ""; # $class serves as temporary storage if (/(\\input|\\include)\s*$arg_rx/o) { ($before,$after,$class,$filename) = ($`, $', $&, $2); $filename =~ s/\s//g; } elsif (/(\\input|\\include)\s+(\S+)(?=\s|$)/o) { ($before,$after,$class,$filename) = ($`, $', $&, $2); $filename =~ s/\s//g; } else { print STDERR "$prompt %--- COULDN'T FIND FILENAME\n" if($debug); } if ($filename) { # Get base name $styles = $filename; $styles =~ s|.*\Q$dd\E||; # strip path $styles =~ s/\.[^.]*$//; # strip extension # Sorry for the next if-statement... (hmm,ok) if ($styles !~ /^($do_include_rx)$/o && $filename !~ /\.($do_include_ext_rx)$/o && ($styles =~ /^($dont_include_rx)$/o || ($opt{auto_exclude} && $filename =~ /\.($dont_include_ext_rx)$/o))) { print STDERR "$prompt %--- ignoring $filename" if($debug); print STYLES "$styles\n" if($opt{save_styles}); } else { local($fname) = &find_file($filename); # notify anyway that a file is found, to allow a Perl # module loaded for this specific file # print STYLES "$styles\n" if($opt{save_styles}); if($fname) { print OUT "$before"; # recursive call &process_file($fname); print OUT $after if($after =~ /\S+/); print STDERR "$prompt %--- successfully included $filename" if($debug > 1); return(1); #merge } else { print STDERR "$prompt include $filename failed. Reinserting $before command\n"; } } } return(0); #no merge } sub process_document_header { local($_) = @_; local(%style_include,@print_styles,$key,$isclass); local($before, $latextype, $styles, $class, $after); if(/\\document(class|style)\s*$options_rx\s*$arg_rx/o) { ($before, $latextype, $styles, $class, $after) = ($`, $1, $2 || '', $3, $'); if ($latextype =~ /class/) { $isclass = 1; } } else { print OUT $_; return; } $_ =~ s/\n$//; print STDERR "$prompt %--- Found $latextype: $_\n" if($debug); $styles =~ s/\[(.*)\]/$1/; # Strip braces $class =~ s/\s//g; # Strip spaces # the class cannot be included, so stuff it in the style file print STYLES "$class".($isclass ? '':"\n") if($opt{save_styles}); foreach $key (split(/,/, $styles)) { $key =~ s/\s//g; # strip spaces push(@print_styles,$key); if (&should_include($key)) { # mark the style for inclusion and search for the # corresponding .clo (LaTeX2e) or .sty (LaTeX209) # &find_file gives the filename or undef. $style_include{$key} = &find_file($key . (($latextype =~ /class/) ? '.clo' : '.sty')); } } $styles = ''; foreach $key (@print_styles) { if(!$style_include{$key}) { # put style back into command and save it to the style file print STYLES ($isclass ? " $key," : "$key\n") if($opt{save_styles}); $styles .= ',' . $key; } } if ($styles) { $styles =~ s/^,//; $styles = '[' . $styles . ']'; } print OUT join('', $before, "\\document", $latextype, $styles, '{', $class, '}', $after); # Include styles after the \document(class|style) command foreach $key (@print_styles) { if($style_include{$key}) { &process_file($style_include{$key}); } } print STYLES "\n" if($opt{save_styles} && $isclass); } sub process_package_cmd { local($_) = @_; local(%style_include,@print_styles,$key); /\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/o; local ($before,$class,$options,$styles,$after) = ($`, $1, $2 || '', $3, $'); print STDERR "$prompt %--- Found \\$class: $_" if($debug > 1); $options =~ s/\[(.*)\]/$1/o; # strip braces foreach $key (split(/,/,$styles)) { $key =~ s/\s//g; # strip spaces # Remember each package and check whether to merge it push(@print_styles,$key); if (&should_include($key)) { $style_include{$key}=&find_file($key . '.sty'); } } $styles = ''; foreach $key (@print_styles) { if (!$style_include{$key}) { # print to style file and reinsert into command # if package is not to be merged print STYLES "$key $options\n" if($opt{save_styles}); $styles .= ',' . $key; } } if($styles) { # Reconstruct command $styles =~ s/^,//; $options = '[' . $options . ']' if($options =~ /\S+/); print OUT $before . '\\' . $class . $options . '{' . $styles . '}' . $after; } else { print OUT $before . $after; } foreach $key (@print_styles) { if($style_include{$key}) { # merge style files &process_file($style_include{$key}); } } } sub process_dont_include { my @items = @_; my $item; foreach $item (@items) { if($item =~ s/^\.//) { # starts with `.'? Then it's an extension $dont_include_ext_rx .= "|\Q$item\E"; } else { push(@dont_include,$item); } } 1; } sub process_do_include { my @items = @_; my $item; foreach $item (@items) { if($item =~ s/^\.//) { # starts with `.'? Then it's an extension $do_include_ext_rx .= (($do_include_ext_rx eq '') ? '' : '|') . "\Q$item\E"; } else { push(@do_include,$item); } } 1; } # Returns true if style has to be included, i.e.: # 1. The style is found in do_include *or* # 2. Automatic exclusion is disabled and the style is *not* found in # dont_include # sub should_include { my ($style) = @_; return($style =~ /^($do_include_rx)$/o || (!$opt{auto_exclude} && $style !~ /^($dont_include_rx)$/o )); } sub find_file { local($file) = @_; local($fname,$dname); local($found)=0; print STDERR "$prompt %--- checking for $file" if($debug); # if ($file =~ m|^$dd|) { if (L2hos->is_absolute_path($file)) { $fname=$file; if(&file_or_ext) { $found=1; } } else { if ($Web2C) { $file =~ s/\s+//g; if ($file =~ s/\.([^\.]+)\Z//) { @ext = ($1); } else { @ext = ('tex', 'ltx', 'sty'); } foreach $ext (@ext) { chop($fname = `kpsewhich -format=.tex $file.$ext`); #RRM: I cannot make this work, to replace the `...` in the line above # $fname = &syswait("kpsewhich -format=.tex $file.$ext"); # chop $fname; print STDERR "$prompt kpsewhich says : $fname" if $debug; $found = 1; last; } } else { # search input directories foreach $dir (@texinputs) { ($dname = $dir) =~ s|[\Q$dd\E]+$||; # Remove slashes at the end if (-d $dname) { if ($fname = &dir_search($dir,$file)) { $found = 1; last; } } else { print STDERR "$prompt %--- Warning: \"$dname\" is no directory" if ($debug); } } } } if ($found) { print STDERR "$prompt %--- found $fname" if ($debug); return($fname); } else { print STDERR "$prompt %--- file not found" if ($debug); return(undef); } } sub dir_search { # search directory recursively local($dir,$file) = @_; local(*SUBDIR); # make file pointer local local($dname,$found,$recursive) =('',0,0); if ($dir =~ m|\Q$dd$dd\E$|) { # does dir end in `//'? $recursive = 1; } $dir =~ s|[\Q$dd\E]+$||; # Remove any slashes at the end local($fname) = join ($dd, $dir, $file); print STDERR "$prompt %--- looking for $fname" if($debug); # Does file exist in this directory? if (&file_or_ext) { return($fname); } elsif ($recursive) { # descend into subdirectories? # search directory for subdirectories opendir(SUBDIR,$dir); # open directory while (defined($_=readdir(SUBDIR))) { # read dir-entries next if(/^\./); # do not check dotfiles $dname = join ($dd, $dir, $_); if ((-d $dname) && ($fname = &dir_search($dname.$dd.$dd,$file))) { $found = 1; last; } } closedir(SUBDIR); if ($found) { return($fname); } } return(0); } sub file_or_ext { # Modifies $fname # if $fname exists return success otherwise # if $fname.tex exists, then bind $fname to $fname.tex and return success # else fail return 1 if(!-d $fname && -r $fname); # && -s $fname; return 0 if $fname =~ /\.tex$/; $fname .= ".tex"; return 1 if -f $fname && -r $fname;# && -s $fname; return 0; }