diff options
author | ville <ville@localhost> | 2009-01-07 22:19:31 +0000 |
---|---|---|
committer | ville <ville@localhost> | 2009-01-07 22:19:31 +0000 |
commit | d994292022a10afcbd3d0ff8e228e856de909eba (patch) | |
tree | f056a1abaf8ed4913e6259fc25015212b4a4e831 | |
parent | 772f028e8abeb36b639ee5e5e84e90c4b6ddd5d6 (diff) | |
download | markup-validator-d994292022a10afcbd3d0ff8e228e856de909eba.zip markup-validator-d994292022a10afcbd3d0ff8e228e856de909eba.tar.gz markup-validator-d994292022a10afcbd3d0ff8e228e856de909eba.tar.bz2 |
Improve doctype preparse performance by aborting as soon as possible, fix required HTML::Parser version.
-rwxr-xr-x | htdocs/docs/install.html | 6 | ||||
-rwxr-xr-x | httpd/cgi-bin/check | 24 | ||||
-rw-r--r-- | misc/bundle/META.yml | 2 | ||||
-rw-r--r-- | misc/bundle/Makefile.PL | 2 | ||||
-rw-r--r-- | misc/bundle/lib/Bundle/W3C/Validator.pm | 2 |
5 files changed, 21 insertions, 15 deletions
diff --git a/htdocs/docs/install.html b/htdocs/docs/install.html index 8416595..9550cb8 100755 --- a/htdocs/docs/install.html +++ b/htdocs/docs/install.html @@ -1,5 +1,5 @@ -<!--#set var="revision" value="\$Id: install.html,v 1.43 2009-01-04 16:54:17 ville Exp $" ---><!--#set var="date" value="\$Date: 2009-01-04 16:54:17 $" +<!--#set var="revision" value="\$Id: install.html,v 1.44 2009-01-07 22:19:31 ville Exp $" +--><!--#set var="date" value="\$Date: 2009-01-07 22:19:31 $" --><!--#set var="title" value="Installation Documentation for The W3C Markup Validation Service" --><!--#set var="relroot" value="../" --><!--#include virtual="../header.html" --> @@ -136,7 +136,7 @@ install Bundle::W3C::Validator <dd> Support for multiple character encodings. </dd> - <dt><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> >= 3.25</dt> + <dt><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> >= 3.24</dt> <dd> Minimal HTML parser used for preparse and finding metadata. </dd> diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 862f484..aedb1f8 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -14,7 +14,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.636 2009-01-07 22:04:23 ville Exp $ +# $Id: check,v 1.637 2009-01-07 22:19:30 ville Exp $ # # Disable buffering on STDOUT! @@ -55,7 +55,7 @@ use Encode::HanExtra qw(); # for some chinese character encodings, use Encode::JIS2K qw(); # ditto extra japanese encodings use File::Spec::Functions qw(catfile); use HTML::Encoding 0.52 qw(); -use HTML::Parser 3.25 qw(); # Need 3.25 for $p->ignore_elements. +use HTML::Parser 3.24 qw(); # Need 3.24 for $p->parse($code_ref) use HTML::Template 2.6 qw(); # Need 2.6 for path param, other things. use HTTP::Headers::Util qw(); use HTTP::Request qw(); @@ -199,7 +199,7 @@ Directory not readable (permission denied): @_r # # Strings - $VERSION = q$Revision: 1.636 $; + $VERSION = q$Revision: 1.637 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; # @@ -2111,8 +2111,7 @@ sub preparse_doctype { }; my $start = sub { - my $tag = shift; - my $attr = shift; + my ($p, $tag, $attr) = @_; if ($File->{Root}) { return unless $tag eq $File->{Root}; @@ -2122,6 +2121,9 @@ sub preparse_doctype { if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}}; if ($attr->{version}) {$File->{'Root Version'} = $attr->{version}}; if ($attr->{baseProfile}) {$File->{'Root BaseProfile'} = $attr->{baseProfile}}; + + # We're done parsing. + $p->eof(); }; # we use HTML::Parser as pre-parser. May use html5lib or other in the future @@ -2130,11 +2132,15 @@ sub preparse_doctype { # if content-type has shown we should pre-parse with XML mode, use that # otherwise (mostly text/html cases) use default mode $p->xml_mode(TRUE) if ($File->{Mode} =~ /XML/); - $p->ignore_elements('BODY'); - $p->ignore_elements('body'); $p->handler(declaration => $dtd, 'text'); - $p->handler(start => $start, 'tag,attr'); - $p->parse(join "\n", @{$File->{Content}}); + $p->handler(start => $start, 'self,tag,attr'); + + my $line = 0; + my $max = scalar(@{$File->{Content}}); + $p->parse(sub { + return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef; + }); + $p->eof(); # TODO: These \s here are probably wrong now that the strings are utf8_on $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE}; diff --git a/misc/bundle/META.yml b/misc/bundle/META.yml index 44292e2..da67bd7 100644 --- a/misc/bundle/META.yml +++ b/misc/bundle/META.yml @@ -15,7 +15,7 @@ requires: Encode::JIS2K: 0 File::Spec::Functions: 0 HTML::Encoding: 0.52 - HTML::Parser: 3.25 + HTML::Parser: 3.24 HTML::Template: 2.6 LWP::UserAgent: 2.032 Net::IP: 0 diff --git a/misc/bundle/Makefile.PL b/misc/bundle/Makefile.PL index 6f2f549..cc04879 100644 --- a/misc/bundle/Makefile.PL +++ b/misc/bundle/Makefile.PL @@ -14,7 +14,7 @@ WriteMakefile( Encode::JIS2K => 0, File::Spec::Functions => 0, HTML::Encoding => 0.52, - HTML::Parser => 3.25, + HTML::Parser => 3.24, HTML::Template => 2.6, LWP::UserAgent => 2.032, Net::IP => 0, diff --git a/misc/bundle/lib/Bundle/W3C/Validator.pm b/misc/bundle/lib/Bundle/W3C/Validator.pm index 2800a10..8a12751 100644 --- a/misc/bundle/lib/Bundle/W3C/Validator.pm +++ b/misc/bundle/lib/Bundle/W3C/Validator.pm @@ -27,7 +27,7 @@ C<perl -MCPAN -e "install Bundle::W3C::Validator"> Encode::JIS2K File::Spec::Functions HTML::Encoding 0.52 - HTML::Parser 3.25 + HTML::Parser 3.24 HTML::Template 2.6 LWP::UserAgent 2.032 Net::IP |