# Copyright (C) 1998-09 Stephane Galland # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, write to # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. =pod =head1 NAME Bib2HTML::Translator::TeX - A translator from TeX to HTML =head1 SYNOPSYS use Bib2HTML::Translator::BibTeX ; my $gen = Bib2HTML::Translator::TeX->new( filename ) ; =head1 DESCRIPTION Bib2HTML::Translator::TeX is a Perl module, which translate a TeX string into an HTML string =head1 GETTING STARTED =head2 Initialization To create a parser, say something like this: use Bib2HTML::Translator::TeX; my $parser = Bib2HTML::Translator::TeX->new( 'toto.bib', '', '' ) ; ...or something similar. Acceptable parameters to the constructor are: =over =item * filename (string) is the filename under parsing. =item * start_math (optional string) is the HTML balise which permits to start the math mode =item * stop_math (optional string) is the HTML balise which permits to stop the math mode =back =head1 METHOD DESCRIPTIONS This section contains only the methods in Parser.pm itself. =over =cut package Bib2HTML::Translator::TeX; @ISA = ('Exporter'); @EXPORT = qw(); @EXPORT_OK = qw( &addtrans_char &gettrans_char &addtrans_cmd_noparam &addtrans_cmd &addtrans_cmd_func &gettrans_cmd &display_supported_commands ); use strict; use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); use Exporter; use Carp ; use Bib2HTML::General::Misc ; use Bib2HTML::General::HTML ; use Bib2HTML::General::Error ; use Bib2HTML::General::Verbose ; use Bib2HTML::Translator::BibTeXEntry ; #------------------------------------------------------ # # Global vars # #------------------------------------------------------ # Version number of the parser my $VERSION = "3.0" ; ############################################################### # This is the list of characters which will be automatically # and directly translatable into a HTML entity # my %TEX_HTML_CHAR_TRANS = ( '~' => ' ', #unsecable space '£' => '£', #pound sign '¤' => '¤', #currency sign '|' => '¦', #broken bar = broken vertical bar '§' => '§', #section sign '°' => '°', #degree sign '²' => '²', #superscript two = superscript digit two = squared 'µ' => 'µ', #micro sign 'À' => 'À', #latin capital letter A with grave = latin capital letter A grave 'Á' => 'Á', #latin capital letter A with acute 'Â' => 'Â', #latin capital letter A with circumflex 'Ã' => 'Ã', #latin capital letter A with tilde 'Ä' => 'Ä', #latin capital letter A with diaeresis 'Å' => 'Å', #latin capital letter A with ring above = latin capital letter A ring 'Æ' => 'Æ', #latin capital letter AE = latin capital ligature AE 'Ç' => 'Ç', #latin capital letter C with cedilla 'È' => 'È', #latin capital letter E with grave 'É' => 'É', #latin capital letter E with acute 'Ê' => 'Ê', #latin capital letter E with circumflex 'Ë' => 'Ë', #latin capital letter E with diaeresis 'Ì' => 'Ì', #latin capital letter I with grave 'Í' => 'Í', #latin capital letter I with acute 'Î' => 'Î', #latin capital letter I with circumflex 'Ï' => 'Ï', #latin capital letter I with diaeresis 'Ñ' => 'Ñ', #latin capital letter N with tilde 'Ò' => 'Ò', #latin capital letter O with grave 'Ó' => 'Ó', #latin capital letter O with acute 'Ô' => 'Ô', #latin capital letter O with circumflex 'Õ' => 'Õ', #latin capital letter O with tilde 'Ö' => 'Ö', #latin capital letter O with diaeresis 'Ø' => 'Ø', #latin capital letter O with stroke = latin capital letter O slash 'Ù' => 'Ù', #latin capital letter U with grave 'Ú' => 'Ú', #latin capital letter U with acute 'Û' => 'Û', #latin capital letter U with circumflex 'Ü' => 'Ü', #latin capital letter U with diaeresis 'Ý' => 'Ý', #latin capital letter Y with acute 'à' => 'à', #latin small letter a with grave = latin small letter a grave 'á' => 'á', #latin small letter a with acute 'â' => 'â', #latin small letter a with circumflex 'ã' => 'ã', #latin small letter a with tilde 'ä' => 'ä', #latin small letter a with diaeresis 'å' => 'å', #latin small letter a with ring above = latin small letter a ring 'æ' => 'æ', #latin small letter ae = latin small ligature ae 'ç' => 'ç', #latin small letter c with cedilla 'è' => 'è', #latin small letter e with grave 'é' => 'é', #latin small letter e with acute 'ê' => 'ê', #latin small letter e with circumflex 'ë' => 'ë', #latin small letter e with diaeresis 'ì' => 'ì', #latin small letter i with grave 'í' => 'í', #latin small letter i with acute 'î' => 'î', #latin small letter i with circumflex 'ï' => 'ï', #latin small letter i with diaeresis 'ñ' => 'ñ', #latin small letter n with tilde 'ò' => 'ò', #latin small letter o with grave 'ó' => 'ó', #latin small letter o with acute 'ô' => 'ô', #latin small letter o with circumflex 'õ' => 'õ', #latin small letter o with tilde 'ö' => 'ö', #latin small letter o with diaeresis 'ø' => 'ø', #latin small letter o with stroke = latin small letter o slash 'ù' => 'ù', #latin small letter u with grave 'ú' => 'ú', #latin small letter u with acute 'û' => 'û', #latin small letter u with circumflex 'ü' => 'ü', #latin small letter u with diaeresis 'ý' => 'ý', #latin small letter y with acute 'ÿ' => 'ÿ', #latin small letter y with diaeresis '"' => '"', #quotation mark = APL quote '^' => 'ˆ', #modifier letter circumflex accent '<' => '<', #less-than sign '>' => '>', #greater-than sign ) ; ############################################################### # This is the list of text-mode commands. # The commands must respect one of the following formats: # 1) 'TeXCmdName' => "HTML code" # permits to translate the LaTeX command \TeXCmdName # into the specified "HTML code". # 2) 'TeXCmdName' => { 'params' => params # 'html' => "HTML code" # } # replaces the command \TeXCmdName by the specified # "HTML code". This last could contains a parameter # number (eg, #1 for the first, #2 for the second, # etc.) which will be replaced by the value # passed to the LaTeX command. The params specifies # the parameter prototype of the LaTeX command. It # must contains one (or more) of: # {} for a needed parameter # [d] for an optional parameter. d # is the default value given to this parameter # if it was not provided inside the LaTeX code # \\ for a LaTeX command name # ! indicates that the following sign ({} or[]) # must not be interpreted by the LaTeX # translator. It must be used for verbatim # output # - to read the text until the end of the current # LaTeX context # 3) 'TeXCmdName' => { 'params' => params # 'latex' => "LaTeX code" # } # replaces the command \TeXCmdName by the specified # "LaTeX code". This last could contains a parameter # number (eg, #1 for the first, #2 for the second, # etc.) which will be replaced by the value # passed to the LaTeX command. The params specifies # the parameter prototype of the LaTeX command. It # must contains one (or more) of the macros defined # in the point 2). # 4) 'TeXCmdName' => { 'params' => params # 'func' => "callback_function_name" # } # replaces the command \TeXCmdName by the result of # the specified callback function. This callback # function must take, at least, 1 parameters: # the current line number. The parameters of the # LaTeX command will be passed to this callback # function after this line number. # Example: for \newcommand{\cmdname}[4][default]{code #2} # we implements the callback function: # sub texcommand_newcommand { # my $lineno = shift || 0 ; # my ($cmdname,$nb_params) = # ( $_[0], $_[1] || 0 ) ; # my ($default,$code) = # ($_[2] || '', $_[3] || '') ; # ... # return '' ; # } # The params specifies the parameter prototype of # the LaTeX command. It must contains one (or more) # of the macros defined in the point 2). # 5) 'TeXCmdName' => { 'params' => params # 'texfunc' => "callback_function_name" # } # replaces the command \TeXCmdName by the result of # the specified callback function. The callback # must assume that its result was some LaTeX expression # which will be evaluated (this is the major difference # between a 'func' and a 'texfunc', VERY IMPORTANT point). # The callback function works same as for 'func' (point 4). # my %TEX_HTML_COMMANDS = ( ' ' => ' ', '_' => '_', # underline sign '-' => '', # hyphenation sign '$' => '\$', ',' => ' ', ';' => ' ', '%' => '%', '}' => '}', '{' => '{', '&' => '&', '\\' => '
', '&' => '&', #ampersand # Patch by Norbert Preining added the 2003/03/17 '#' => '#', '\'' => { 'params' => '{}', 'func' => 'texcommand_acute', }, '`' => { 'params' => '{}', 'func' => 'texcommand_grave', }, '~' => { 'params' => '{}', 'func' => 'texcommand_tilde', }, '"' => { 'params' => '{}', 'func' => 'texcommand_uml', }, '^' => { 'params' => '{}', 'func' => 'texcommand_circ', }, '=' => { 'params' => '{}', # One parameter 'func' => 'texcommand_bar', }, 'AA' => 'Å', 'aa' => 'å', 'AE' => 'Æ', #latin small letter ae = latin small ligature ae 'ae' => 'æ', #latin small letter ae = latin small ligature ae 'begin' => { 'params' => '!{}', # Start environment 'texfunc' => 'texcommand_beginenv', }, 'backslash' => '\\', 'beginblock' => '', # Ignored 'bf' => { 'params' => '-', # Bold font 'func' => 'texcommand_font_bold', }, 'bfseries' => { 'params' => '-', # Bold font 'func' => 'texcommand_font_bold', }, 'BibtoHTML' => 'BIB2HTML', # Bib2HTML logo 'bibtohtml' => 'BIB2HTML', # Bib2HTML logo 'BibTeX' => 'BIBTEX', # BibTeX logo 'c' => { 'params' => '{}', 'func' => 'texcommand_cedil', }, 'cdot' => '·', #middle dot = Georgian comma = Greek middle dot 'cite' => { 'params' => '[]{}', 'func' => 'texcommand_cite', }, 'def' => { 'params' => '\\{}', 'func' => 'texcommand_def', }, 'degree' => '°', #degree sign 'dg' => 'ð', #latin small letter eth 'DH' => 'Ð', #latin capital letter ETH 'div' => '÷', #division sign 'edef' => { 'params' => '\\{}', 'func' => 'texcommand_edef', }, 'Emph' => { 'params' => '{}', 'html' => '#1', }, 'em' => { 'params' => '-', # Emphasis 'html' => "#1", }, 'emph' => { 'params' => '{}', # Emphasis 'html' => '#1', }, 'end' => { 'params' => '!{}', # End environment 'texfunc' => 'texcommand_endenv', }, 'enditemize' => '', 'ensuremath' => { 'params' => '{}', 'func' => 'texcommand_ensuremath', }, 'footnotesize' => { 'params' => '-', 'html' => "#1", }, 'gdef' => { 'params' => '\\{}', 'func' => 'texcommand_def', }, 'global' => '', # ignored 'guillemotleft' => '«', #left-pointing double angle quotation mark 'guillemotright' => '»', #right-pointing double angle quotation mark = right pointing guillemet 'Huge' => { 'params' => '-', 'html' => "#1", }, 'html' => { 'params' => '!{}', # verbatim HTML code 'html' => '#1', }, 'huge' => { 'params' => '-', 'html' => "#1", }, 'i' => 'i', 'it' => { 'params' => '-', # Italic font 'func' => 'texcommand_font_italic', }, 'item' => '
  • ', 'itshape' => { 'params' => '-', # Italic font 'func' => 'texcommand_font_italic', }, # Patch by Norbert Preining added the 2003/03/17 'L' => 'L', # L bar 'LARGE' => { 'params' => '-', 'html' => "#1", }, 'Large' => { 'params' => '-', 'html' => "#1", }, 'LaTeX' => 'LATEX', # LaTeX logo 'large' => { 'params' => '-', 'html' => "#1", }, 'latex' => { 'params' => '{}', # Ignore the LaTeX commands 'html' => '', }, 'lnot' => '¬', #not sign 'mdseries' => { 'params' => '-', # Unbold Font 'func' => 'texcommand_font_medium', }, 'newcommand' => { 'params' => '{}[][]{}', 'func' => 'texcommand_newcommand', }, 'normalfont' => { 'params' => '-', 'func' => 'texcommand_font_normal', }, 'normalsize' => { 'params' => '-', 'html' => "#1", }, 'O' => 'Ø', 'o' => 'ø', 'OE' => 'Œ', #latin capital ligature OE 'oe' => 'œ', #latin small ligature oe 'P' => '¶', #pilcrow sign = paragraph sign 'pm' => '±', #plus-minus sign = plus-or-minus sign 'pounds' => '£s;', #pound sign 'renewcommand' => { 'params' => '{}[][]{}', 'func' => 'texcommand_newcommand', }, 'rm' => { 'params' => '-', # Roman font 'func' => "texcommand_font_roman", }, 'rmfamily' => { 'params' => '-', # Roman font 'func' => "texcommand_font_roman", }, 'S' => '§', #section sign 'sc' => { 'params' => '-', # Small-caps font 'func' => "texcommand_font_smallcap", }, 'scriptsize' => { 'params' => '-', 'html' => "#1", }, 'scshape' => { 'params' => '-', # Small-caps font 'func' => "texcommand_font_smallcap", }, 'sf' => { 'params' => '-', # Sans Serif font 'func' => "texcommand_font_serif", }, 'sffamily' => { 'params' => '-', # Sans Serif font 'func' => "texcommand_font_serif", }, 'sl' => { 'params' => '-', # Slanted font 'func' => "texcommand_font_slanted", }, 'slshape' => { 'params' => '-', # Slanted font 'func' => "texcommand_font_slanted", }, 'small' => { 'params' => '-', 'html' => "#1", }, 'ss' => 'ß', #latin small letter sharp s = ess-zed 'startblock' => '', # Ignored 'startitemize' => '