# Provides support for latex2html for additional commands needed for # the apcupsd manual. # Returns the minimum of any number of numeric arguments. sub min { my $tmp = shift; while ($test = shift) { $tmp = $test if ($test < $tmp); } return $tmp; } # These two are copied from # /usr/lib/latex2html/style/hthtml.perl, # from the subroutine do_cmd_htmladdnormallink. # They have been renamed, then removed the # name argument and reversed the other two arguments. sub do_cmd_elink{ local($_) = @_; local($text, $url, $href); local($opt, $dummy) = &get_next_optional_argument; $text = &missing_braces unless ((s/$next_pair_pr_rx/$text = $2; ''/eo) ||(s/$next_pair_rx/$text = $2; ''/eo)); $url = &missing_braces unless ((s/$next_pair_pr_rx/$url = $2; ''/eo) ||(s/$next_pair_rx/$url = $2; ''/eo)); $*=1; s/^\s+/\n/; $*=0; $href = &make_href($url,$text); print "\nHREF:$href" if ($VERBOSITY > 3); join ('',$href,$_); } sub do_cmd_ilink { local($_) = @_; local($text); local($opt, $dummy) = &get_next_optional_argument; $text = &missing_braces unless ((s/$next_pair_pr_rx/$text = $2; ''/eo) ||(s/$next_pair_rx/$text = $2; ''/eo)); &process_ref($cross_ref_mark,$cross_ref_mark,$text); } sub do_cmd_lt { join('',"\<",$_[0]); } sub do_cmd_gt { join('',"\>",$_[0]); } # KEC Copied from latex2html.pl and modified to prevent # filename collisions. This is done with a static hash of # already-used filenames. An integer is appended to the # filename if a collision would result without it. # The addition of the integer is done by removing # character(s) before .html if adding the integer would result # in a filename longer than 32 characters. Usually just removing # the character before .html would resolve the collision, but we # add the integer anyway. The first integer that resolves the # collision is used. # If a filename is desired that is 'index.html' or any case # variation of that, it is changed to index_page.html, # index_page1.html, etc. #RRM Extended to allow customised filenames, set $CUSTOM_TITLES # or long title from the section-name, set $LONG_TITLES # { my %used_names; # Static hash. sub make_name { local($sec_name, $packed_curr_sec_id) = @_; local($title,$making_name,$saved) = ('',1,''); my $final_name; if ($LONG_TITLES) { $saved = $_; # This alerts the subroutine textohtmlindex not to increment its index counter on the next call. &do_cmd_textohtmlindex("\001noincrement"); &process_command($sections_rx, $_) if /^$sections_rx/; $title = &make_apcupsd_title($TITLE) unless ((! $TITLE) || ($TITLE eq $default_title)); $_ = $saved; } elsif ($CUSTOM_TITLES) { $saved = $_; # This alerts the subroutine textohtmlindex not to increment its index counter on the next call. &do_cmd_textohtmlindex("\001noincrement"); &process_command($sections_rx, $_) if /^$sections_rx/; $title = &custom_title_hook($TITLE) unless ((! $TITLE) || ($TITLE eq $default_title)); $_ = $saved; } if ($title) { #ensure no more than 32 characters, including .html extension $title =~ s/^(.{1,27}).*$/$1/; ++$OUT_NODE; $final_name = join("", ${PREFIX}, $title, $EXTN); } else { # Remove 0's from the end of $packed_curr_sec_id $packed_curr_sec_id =~ s/(_0)*$//; $packed_curr_sec_id =~ s/^\d+$//o; # Top level file $final_name = join("",($packed_curr_sec_id ? "${PREFIX}$NODE_NAME". ++$OUT_NODE : $sec_name), $EXTN); } # Change the name from index to index_page to avoid conflicts with # index.html. $final_name =~ s/^(index)\.html$/$1_Page.html/i; # If the $final_name is already used, put an integer before the # # .html to make it unique. my $integer = 0; my $saved_name = $final_name; while (exists($used_names{$final_name})) { $final_name = $saved_name; my ($filename,$ext) = $final_name =~ /(.*)(\..*)$/; my $numlen = length(++$integer); # If the filename (after adding the integer) would be longer than # 32 characters, insert the integer within it. if (((my $namelen = length($final_name)) + $numlen) >= 32) { substr($filename,-$numlen) = $integer; } else { $filename .= $integer; } $final_name = $filename . $ext; } # Save the $final_name in the hash to mark it as being used. $used_names{$final_name} = undef; return $final_name; } } sub make_apcupsd_title { local($_)= @_; local($num_words) = $LONG_TITLES; #RRM: scan twice for short words, due to the $4 overlap # Cannot use \b , else words break at accented letters $_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig; $_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig; #remove leading numbering, unless that's all there is. local($sec_num); if (!(/^\d+(\.\d*)*\s*$/)&&(s/^\s*(\d+(\.\d*)*)\s*/$sec_num=$1;''/e)) { $num_words-- }; &remove_markers; s/<[^>]*>//g; #remove tags #revert entities, etc. to TeX-form... s/([\200-\377])/"\&#".ord($1).";"/eg; $_ = &revert_to_raw_tex($_); # get $LONG_TITLES number of words from what remains $_ = &get_apcupsd_words($_, $num_words) if ($num_words); # ...and cleanup accents, spaces and punctuation $_ = join('', ($SHOW_SECTION_NUMBERS ? $sec_num : ''), $_); s/\\\W\{?|\}//g; s/\s/_/g; s/\'s/s/ig; # Replace 's with just the s. s/\W/_/g; s/__+/_/g; s/_+$//; $_; } #JCL(jcl-tcl) # changed completely # KEC 2-21-05 Changed completely again. # # We take the first real words specified by $min from the string. # REmove all markers and markups. # Split the line into words. # Determine how many words we should process. # Return if no words to process. # Determine lengths of the words. # Reduce the length of the longest words in the list until the # total length of all the words is acceptable. # Put the words back together and return the result. # sub get_apcupsd_words { local($_, $min) = @_; local($words,$i); local($id,%markup); # KEC my ($oalength,@lengths,$last,$thislen); my $maxlen = 28; #no limit if $min is negative $min = 1000 if ($min < 0); &remove_anchors; #strip unwanted HTML constructs s/<\/?(P|BR|H)[^>]*>/ /g; #remove leading white space and \001 characters s/^\s+|\001//g; #lift html markup s/(<[^>]*>(#[^#]*#)?)//ge; # Split $_ into a list of words. my @wrds = split /\s+|\-{3,}/; $last = &min($min - 1,$#wrds); return '' if ($last < 0); # Get a list of word lengths up to the last word we're to process. # Add one to each for the separator. @lengths = map (length($_)+1,@wrds[0..$last]); $thislen = $maxlen + 1; # One more than the desired max length. do { $thislen--; @lengths = map (&min($_,$thislen),@lengths); $oalength = 0; foreach (@lengths) {$oalength += $_;} } until ($oalength <= $maxlen); $words = join(" ",map (substr($wrds[$_],0,$lengths[$_]-1),0..$last)); return $words; } sub do_cmd_htmlfilename { my $input = shift; my ($id,$filename) = $input =~ /^<#(\d+)#>(.*?)<#\d+#>/; } # KEC 2-26-05 # do_cmd_addcontentsline adds support for the addcontentsline latex command. It evaluates # the arguments to the addcontentsline command and determines where to put the information. Three # global lists are kept: for table of contents, list of tables, and list of figures entries. # Entries are saved in the lists in the order they are encountered so they can be retrieved # in the same order. my (%toc_data); sub do_cmd_addcontentsline { &do_cmd_real_addcontentsline(@_); } sub do_cmd_real_addcontentsline { my $data = shift; my ($extension,$pat,$unit,$entry); # The data is sent to us as fields delimited by their ID #'s. Extract the # fields. The first is the extension of the file to which the cross-reference # would be written by LaTeX, such as {toc}, {lot} or {lof}. The second is either # {section}, {subsection}, etc. for a toc entry, or , {table}, or {figure} # for a lot, or lof extension (must match the first argument), and # the third is the name of the entry. The position in the document represents # and anchor that must be built to provide the linkage from the entry. $extension = &missing_braces unless ( ($data =~ s/$next_pair_pr_rx/$extension=$2;''/eo) ||($data =~ s/$next_pair_rx/$extension=$2;''/eo)); $unit = &missing_braces unless ( ($data =~ s/$next_pair_pr_rx/$unit=$2;''/eo) ||($data =~ s/$next_pair_rx/$unit=$2;''/eo)); $entry = &missing_braces unless ( ($data =~ s/$next_pair_pr_rx/$pat=$1;$entry=$2;''/eo) ||($data =~ s/$next_pair_rx/$pat=$1;$entry=$2;''/eo)); $contents_entry = &make_contents_entry($extension,$pat,$entry,$unit); return ($contents_entry . $data); } # Creates and saves a contents entry (toc, lot, lof) to strings for later use, # and returns the entry to be inserted into the stream. # sub make_contents_entry { local($extension,$br_id, $str, $unit) = @_; my $words = ''; my ($thisref); # If TITLE is not yet available use $before. $TITLE = $saved_title if (($saved_title)&&(!($TITLE)||($TITLE eq $default_title))); $TITLE = $before unless $TITLE; # Save the reference if ($SHOW_SECTION_NUMBERS) { $words = &get_first_words($TITLE, 1); } else { $words = &get_first_words($TITLE, 4); } $words = 'no title' unless $words; # # any \label in the $str will have already # created a label where the \addcontentsline occurred. # This has to be removed, so that the desired label # will be found on the toc page. # if ($str =~ /tex2html_anchor_mark/ ) { $str =~ s/><\/A>]+)>$cross_ref_mark/ do { ($label,$id) = ($1,$2); $ref_label = $external_labels{$label} unless ($ref_label = $ref_files{$label}); '"' . "$ref_label#$label" . '">' . &get_ref_mark($label,$id)} /geo; } $str =~ s/<\#[^\#>]*\#>//go; #RRM # recognise \char combinations, for a \backslash # $str =~ s/\&\#;\'134/\\/g; # restore \\s $str =~ s/\&\#;\`
/\\/g; # ditto $str =~ s/\&\#;*SPMquot;92/\\/g; # ditto $thisref = &make_named_href('',"$CURRENT_FILE#$br_id",$str); $thisref =~ s/\n//g; # Now we build the actual entry that will go in the lot and lof. # If this is the first entry, we have to put a leading newline. if ($unit eq 'table' ) { if (!$table_captions) { $table_captions = "\n";} $table_captions .= "
  • $thisref\n"; } elsif ($unit eq 'figure') { if (!$figure_captions) { $figure_captions = "\n"; } $figure_captions .= "
  • $thisref\n"; } "$anchor_invisible_mark<\/A>"; } # This is needed to keep latex2html from trying to make an image for the registered # trademark symbol (R). This wraps the command in a deferred wrapper so it can be # processed as a normal command later on. If this subroutine is not put in latex2html # invokes latex to create an image for the symbol, which looks bad. sub wrap_cmd_textregistered { local($cmd, $_) = @_; (&make_deferred_wrapper(1).$cmd.&make_deferred_wrapper(0),$_) } # KEC # Copied from latex2html.pl and modified to create a file of image translations. # The problem is that latex2html creates new image filenames like imgXXX.png, where # XXX is a number sequentially assigned. This is fine but makes for very unfriendly # image filenames. I looked into changing this behavior and it seems very much embedded # into the latex2html code, not easy to change without risking breaking something. # So I'm taking the approach here to write out a file of image filename translations, # to reference the original filenames from the new filenames. THis was post-processing # can be done outside of latex2html to rename the files and substitute the meaningful # image names in the html code generated by latex2html. This post-processing is done # by a program external to latex2html. # # What we do is this: This subroutine is called to output images.tex, a tex file passed to # latex to convert the original images to .ps. The string $latex_body contains info for # each image file, in the form of a unique id and the orininal filename. We extract both, use # the id is used to look up the new filename in the %id_map hash. The new and old filenames # are output into the file 'filename_translations' separated by \001. # sub make_image_file { do { my $tmp = $latex_body; open KC,">imagename_translations" or die "Cannot open filename translation file for writing"; while ($tmp =~ /\\lthtmlpictureA\{(.*?)\}\%\n\\includegraphics\{(.*?)\}\%/) { $tmp = $'; my $id = $id_map{$1}; my $oldname = $2; $id =~ s/\#.*//; print KC "img$id.png\001$oldname\n"; } close KC; print "\nWriting image file ...\n"; open(ENV,">.$dd${PREFIX}images.tex") || die "\nCannot write '${PREFIX}images.tex': $!\n"; print ENV &make_latex($latex_body); print ENV "\n"; close ENV; ©_file($FILE, "bbl"); ©_file($FILE, "aux"); } if ((%latex_body) && ($latex_body =~ /newpage/)); } 1; # Must be present as the last line.