# package HTML::WebMake::MetaTable; ########################################################################### use Carp; use strict; use HTML::WebMake::Main; use vars qw{ @ISA $TARGETS $METAS }; $TARGETS = 1; $METAS = 2; ########################################################################### sub new ($$$$$) { my $class = shift; $class = ref($class) || $class; my ($main) = @_; my $self = { 'main' => $main, }; bless ($self, $class); $self; } sub dbg { HTML::WebMake::Main::dbg (@_); } sub dbg2 { HTML::WebMake::Main::dbg2 (@_); } # ------------------------------------------------------------------------- sub set_name_sed_callback { my ($self, $sedobj, $sedmethod) = @_; $self->{sedobj} = $sedobj; $self->{sedmethod} = $sedmethod; } # ------------------------------------------------------------------------- sub parse_metatable { my ($self, $attrs, $text) = @_; my $fmt = $attrs->{format}; if (!defined $fmt || $fmt eq 'csv') { return $self->parse_metatable_csv ($attrs, $text); } else { return $self->parse_metatable_xml ($attrs, $text); } } # ------------------------------------------------------------------------- sub parse_metatable_csv { my ($self, $attrs, $text) = @_; my $delim = $attrs->{delimiter}; $delim ||= "\t"; $delim = qr{\Q${delim}\E}; my @metanames = (); my @addinfo = (); my $i; my $lang; foreach my $line (split (/\n/, $text)) { my @elems = split (/${delim}/, $line); my $contname = shift @elems; next unless defined $contname; if ($contname eq '.') { @metanames = @elems; next; } if ($contname eq '..') { @addinfo = @elems; $lang = lc $addinfo[1] if $addinfo[0] =~ 'lang'; dbg2 ("setting \"lang\" to \"$lang\""); next; } $contname = $self->fixname ($contname); my $contobj = $self->{main}->{contents}->{$contname}; if (!defined $contobj) { $self->{main}->fail (": cannot find content \${$contname}"); next; } if ($#metanames < 0) { $self->{main}->fail (": no '.' line in file"); next; } for ($i = 0; $i <= $#elems && $i <= $#metanames; $i++) { my $metaname = $metanames[$i]; $metaname .= "::".$lang if defined $lang; my $val = $elems[$i]; $contobj->create_extra_metas_if_needed(); $contobj->{extra_metas}->{$metaname} = $val; dbg2 ("attaching metadata \"$metaname\"=\"$val\" to content \"$contname\""); } } } # ------------------------------------------------------------------------- sub parse_metatable_xml { my ($self, $attrs, $text) = @_; # trim off text before/after chunk $text =~ s/^.*?]*?>//gis; $text =~ s/<\/\s*metaset\s*>.*$//gis; # TODO: once we require an XML parser for XSLT stuff, we should use # that here instead of strip_tags. my $util = $self->{main}->{util}; my $src = $attrs->{src}; $src ||= '(.wmk file)'; $util->set_filename ($src); # Right, this is nasty. Perl coredumps here regularly... :( Basically it # looks like the nested XML parsing calls tickle a bug in 5.6.0, resulting in # a coredump inside malloc() on RedHat 7.1 at least. # # The workaround that _seems_ to work is to move the parsing of the textblock # inside the tags out of that parser loop, by storing them in a hash # until the tags are all parsed, then parsing them afterwards. # gross and not as efficient, but it works. $self->{targetblocks} = { }; $self->parse_xml_block ($text, $TARGETS); # $text = ''; foreach my $contname (keys %{$self->{targetblocks}}) { $contname = $self->fixname ($contname); my $contobj = $self->{main}->{contents}->{$contname}; $text = $self->{targetblocks}->{$contname}; $self->{tagging_content} = $contobj; $self->parse_xml_block ($text, $METAS); } delete $self->{targetblocks}; $text = ''; undef; } # ------------------------------------------------------------------------- sub tag_target { my ($self, $tag, $attrs, $text) = @_; my $contname = $attrs->{'id'}; my $contobj = $self->{main}->{contents}->{$contname}; if (!defined $contobj) { $self->{main}->fail (": cannot find content \${$contname}"); return ''; } $self->{targetblocks}->{$contname} = $text; ''; } # ------------------------------------------------------------------------- sub tag_meta { my ($self, $tag, $attrs, $text) = @_; my $contobj = $self->{tagging_content}; $contobj->create_extra_metas_if_needed(); $contobj->{extra_metas}->{$attrs->{'name'}} = $text; ''; } # ------------------------------------------------------------------------- sub parse_xml_block { my ($self, $block, $subtags) = @_; my $util = $self->{main}->{util}; $block =~ s/^\s+//gs; $block =~ s/^//gs; if ($subtags eq $TARGETS) { $block = $util->strip_tags ($block, "target", $self, \&tag_target, qw(id)); } elsif ($subtags eq $METAS) { $block = $util->strip_tags ($block, "meta", $self, \&tag_meta, qw(name)); } else { die "oops!"; } if (!defined $block) { $self->{main}->fail ("XML metatable file was unparseable"); } elsif ($block =~ /\S/) { $block =~ /^(.*?>.{40,40})/s; $block = $1 || $_; $block =~ s/\s+/ /gs; $self->{main}->fail ("XML metatable file contains unparseable data at:\n". "\t$block ...\"\n"); } 1; } # ------------------------------------------------------------------------- sub fixname { my ($self, $contname) = @_; if (defined $self->{sedobj}) { $contname = &{$self->{sedmethod}} ($self->{sedobj}, $contname); } $contname; } # ------------------------------------------------------------------------- 1; # METATABLE XML FORMAT # # The idea is to allow tagging of content items with metadata in an XML # format. # # # # # This is contentname's title. # # #