Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base primitive improvements #2369

Merged
merged 14 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/LaTeXML/Common/Font.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use LaTeXML::Common::Font::Metric;
use LaTeXML::Common::Font::StandardMetrics;
use LaTeXML::Common::Color;
use List::Util qw(min max sum);
use base qw(LaTeXML::Common::Object);
use base qw(LaTeXML::Common::Object);

# Note that this has evolved way beynond just "font",
# but covers text properties (or even display properties) in general
Expand Down Expand Up @@ -62,7 +62,7 @@ my $FLAG_EMPH = 0x10;
my %font_family = (
cmr => { family => 'serif' }, cmss => { family => 'sansserif' },
cmtt => { family => 'typewriter' }, cmvtt => { family => 'typewriter' },
cmti => { family => 'typewriter', shape => 'italic' },
cmt => { family => 'serif' }, # for cmti "text italic"
cmfib => { family => 'serif' }, cmfr => { family => 'serif' },
cmdh => { family => 'serif' }, cm => { family => 'serif' },
ptm => { family => 'serif' }, ppl => { family => 'serif' },
Expand Down
3 changes: 2 additions & 1 deletion lib/LaTeXML/Core/Document.pm
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use LaTeXML::Common::XML;
use LaTeXML::Util::Radix;
use Unicode::Normalize;
use Scalar::Util qw(blessed);
use base qw(LaTeXML::Common::Object);
use base qw(LaTeXML::Common::Object);

#**********************************************************************
# These two element names are `leaks' of the document structure into
Expand Down Expand Up @@ -250,6 +250,7 @@ sub canAutoClose {
|| (($t == XML_ELEMENT_NODE) # otherwise must be element
&& !$node->getAttribute('_noautoclose') # without _noautoclose
&& ($node->getAttribute('_autoclose') # and either with _autoclose
|| $node->getAttribute('_autoopened') # or was autoopened
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that the implied "default" now is for any Tag() marked as autoOpen => 1, it will also lead to autoClose => 1 ?

I see we have a special attribute _noautoclose that prevents autoclose, but it makes me wonder if we should also respect an explicit autoClose => 0 from Tag(). Or document that we won't, or...

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you put it like that, it does seems too much, although it's probably correct most/all of the time? The problem element in this case was the ltx:text opened for font switches, so better to explicitly mark them as auto-closable, as well. patched...

# OR it has autoClose set on tag properties
|| (($props = $STATE->lookupMapping('TAG_PROPERTIES', getNodeQName($self, $node)))
&& $$props{autoClose})));
Expand Down
11 changes: 11 additions & 0 deletions lib/LaTeXML/Core/State.pm
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ sub new {
$$self{uccode} = {};
$$self{delcode} = {};
$$self{tracing_definitions} = {};
# Initializations that INITEX would have set.
$$self{mathcode}{'.'} = [0];
for (my $c = ord('0') ; $c <= ord('9') ; $c++) {
$$self{mathcode}{ chr($c) } = [0x7000]; }
for (my $c = ord('a') ; $c <= ord('z') ; $c++) {
my $C = $c + ord('A') - ord('a');
$$self{mathcode}{ chr($c) } = [0x7100];
$$self{mathcode}{ chr($C) } = [0x7100];
$$self{uccode}{ chr($c) } = [$C];
$$self{lccode}{ chr($C) } = [$c];
$$self{sfcode}{ chr($C) } = [999]; }
return $self; }

sub assign_internal {
Expand Down
52 changes: 34 additions & 18 deletions lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,13 @@ DefParameterType('Optional', sub {
DefParameterType('GeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {

return $gullet->readBalanced(0, 0, 1); });

DefParameterType('XGeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {
return $gullet->readBalanced(1, 0, 1); });

DefParameterType('Until', sub {
my ($gullet, $until) = @_;
$gullet->readUntil($until); },
Expand Down Expand Up @@ -371,26 +375,38 @@ DefParameterType('BalancedParen', sub {
# It is useful when the content would usually need to have been \protect'd
# in order to correctly deal with catcodes.
# BEWARE: This is NOT a shorthand for a simple digested {}!
DefParameterType('Digested', sub {
no warnings 'recursion';
my ($gullet) = @_;
$gullet->skipSpaces;
my $ismath = $STATE->lookupValue('IN_MATH');
my @list = ();
my $token;
do { $token = $gullet->readXToken(0);
} while (defined $token && (($token->getCatcode == CC_SPACE) || $token->equals(T_CS('\relax'))));
if (!defined $token) { }
elsif ($token->getCatcode == CC_BEGIN) {
Digest($token);
push(@list, $STATE->getStomach->digestNextBody()); pop(@list); } # content w/o the braces
else {
push(@list, $STATE->getStomach->invokeToken($token)); }
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
List(@list, mode => ($ismath ? 'math' : 'text')); },
sub readDigested {
no warnings 'recursion';
my ($gullet) = @_;
$gullet->skipSpaces;
my $ismath = $STATE->lookupValue('IN_MATH');
my @list = ();
my $token;
do { $token = $gullet->readXToken(0);
} while (defined $token && (($token->getCatcode == CC_SPACE) || $token->equals(T_CS('\relax'))));
if (!defined $token) { }
elsif ($token->getCatcode == CC_BEGIN) {
Digest($token);
push(@list, $STATE->getStomach->digestNextBody()); pop(@list); } # content w/o the braces
else {
push(@list, $STATE->getStomach->invokeToken($token)); }
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
return List(@list, mode => ($ismath ? 'math' : 'text')); }

DefParameterType('Digested', \&readDigested,
undigested => 1, # since _already_ digested.
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# Read a Delimiter;
# Formally a delimiter is either a token, or \delimiter<number> or maybe \radical<number>,
# but we don't actually restrict to those.
# Here, we just read a single Digested thing, but avoid reversion braces if possible.
DefParameterType('TeXDelimiter', \&readDigested,
undigested => 1, # since _already_ digested.
reversion => sub {
my @arg = Revert($_[0]);
(scalar(@arg) == 1 ? $arg[0] : (T_BEGIN, @arg, T_END)); });

# A variation: Digest until we encounter a given token!
DefParameterType('DigestUntil', sub {
my ($gullet, $until) = @_;
Expand Down
15 changes: 10 additions & 5 deletions lib/LaTeXML/Engine/TeX_FileIO.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,23 @@ DefPrimitive('\openout Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', su
DefPrimitive('\closeout Number', sub {
my ($stomach, $port) = @_;
$port = ToString($port);
if ($LaTeXML::DEBUG{write}) {
if (my $filename = LookupValue('output_file:' . $port)) {
my $handle = $filename . '_contents';
my $contents = LookupValue($handle);
Debug("CLOSING $filename with content:\n$contents\n============================="); } }
AssignValue('output_file:' . $port => undef, 'global');
return; });

DefPrimitive('\write Number {}', sub {
DefPrimitive('\write Number XGeneralText', sub {
my ($stomach, $port, $tokens) = @_;
$port = ToString($port);
if (my $filename = LookupValue('output_file:' . $port)) {
my $handle = $filename . '_contents';
my $contents = LookupValue($handle);
AssignValue($handle => $contents . UnTeX(Expand($tokens), 1) . "\n", 'global'); }
AssignValue($handle => $contents . UnTeX($tokens, 1) . "\n", 'global'); }
else {
Note(UnTeX(Expand($tokens))); }
Note(UnTeX($tokens)); }
return; });

# Since we don't paginate, we're effectively always "shipping out",
Expand Down Expand Up @@ -145,7 +150,7 @@ DefMacro('\input TeXFileName', sub {
#----------------------------------------------------------------------
# \special c sends material to the dvi file for special processing.

DefPrimitive('\special {}', sub {
DefPrimitive('\special XGeneralText', sub {
my ($stomach, $arg) = @_;
my $special_str = ToString($arg);
# recognize one special graphics inclusion case
Expand All @@ -161,7 +166,7 @@ DefPrimitive('\special {}', sub {
$stomach->getGullet->unread(
T_CS('\ltx@special@graphics'), @kv, T_BEGIN, T_OTHER($graphic), T_END); }
else {
Info('ignored', 'special', $stomach, 'Unrecognized TeX Special', $arg); }
Info('ignored', 'special', $stomach, 'Unrecognized TeX Special' . ToString($arg)); }
return; });

# adapted from graphicx.sty.ltxml
Expand Down
Loading
Loading