summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorville <ville@localhost>2009-01-07 22:19:31 +0000
committerville <ville@localhost>2009-01-07 22:19:31 +0000
commitd994292022a10afcbd3d0ff8e228e856de909eba (patch)
treef056a1abaf8ed4913e6259fc25015212b4a4e831
parent772f028e8abeb36b639ee5e5e84e90c4b6ddd5d6 (diff)
downloadmarkup-validator-d994292022a10afcbd3d0ff8e228e856de909eba.zip
markup-validator-d994292022a10afcbd3d0ff8e228e856de909eba.tar.gz
markup-validator-d994292022a10afcbd3d0ff8e228e856de909eba.tar.bz2
Improve doctype preparse performance by aborting as soon as possible, fix required HTML::Parser version.
-rwxr-xr-xhtdocs/docs/install.html6
-rwxr-xr-xhttpd/cgi-bin/check24
-rw-r--r--misc/bundle/META.yml2
-rw-r--r--misc/bundle/Makefile.PL2
-rw-r--r--misc/bundle/lib/Bundle/W3C/Validator.pm2
5 files changed, 21 insertions, 15 deletions
diff --git a/htdocs/docs/install.html b/htdocs/docs/install.html
index 8416595..9550cb8 100755
--- a/htdocs/docs/install.html
+++ b/htdocs/docs/install.html
@@ -1,5 +1,5 @@
-<!--#set var="revision" value="\$Id: install.html,v 1.43 2009-01-04 16:54:17 ville Exp $"
---><!--#set var="date" value="\$Date: 2009-01-04 16:54:17 $"
+<!--#set var="revision" value="\$Id: install.html,v 1.44 2009-01-07 22:19:31 ville Exp $"
+--><!--#set var="date" value="\$Date: 2009-01-07 22:19:31 $"
--><!--#set var="title" value="Installation Documentation for The W3C Markup Validation Service"
--><!--#set var="relroot" value="../"
--><!--#include virtual="../header.html" -->
@@ -136,7 +136,7 @@ install Bundle::W3C::Validator
<dd>
Support for multiple character encodings.
</dd>
- <dt><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> &gt;= 3.25</dt>
+ <dt><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> &gt;= 3.24</dt>
<dd>
Minimal HTML parser used for preparse and finding metadata.
</dd>
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 862f484..aedb1f8 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.636 2009-01-07 22:04:23 ville Exp $
+# $Id: check,v 1.637 2009-01-07 22:19:30 ville Exp $
#
# Disable buffering on STDOUT!
@@ -55,7 +55,7 @@ use Encode::HanExtra qw(); # for some chinese character encodings,
use Encode::JIS2K qw(); # ditto extra japanese encodings
use File::Spec::Functions qw(catfile);
use HTML::Encoding 0.52 qw();
-use HTML::Parser 3.25 qw(); # Need 3.25 for $p->ignore_elements.
+use HTML::Parser 3.24 qw(); # Need 3.24 for $p->parse($code_ref)
use HTML::Template 2.6 qw(); # Need 2.6 for path param, other things.
use HTTP::Headers::Util qw();
use HTTP::Request qw();
@@ -199,7 +199,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.636 $;
+ $VERSION = q$Revision: 1.637 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -2111,8 +2111,7 @@ sub preparse_doctype {
};
my $start = sub {
- my $tag = shift;
- my $attr = shift;
+ my ($p, $tag, $attr) = @_;
if ($File->{Root}) {
return unless $tag eq $File->{Root};
@@ -2122,6 +2121,9 @@ sub preparse_doctype {
if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}};
if ($attr->{version}) {$File->{'Root Version'} = $attr->{version}};
if ($attr->{baseProfile}) {$File->{'Root BaseProfile'} = $attr->{baseProfile}};
+
+ # We're done parsing.
+ $p->eof();
};
# we use HTML::Parser as pre-parser. May use html5lib or other in the future
@@ -2130,11 +2132,15 @@ sub preparse_doctype {
# if content-type has shown we should pre-parse with XML mode, use that
# otherwise (mostly text/html cases) use default mode
$p->xml_mode(TRUE) if ($File->{Mode} =~ /XML/);
- $p->ignore_elements('BODY');
- $p->ignore_elements('body');
$p->handler(declaration => $dtd, 'text');
- $p->handler(start => $start, 'tag,attr');
- $p->parse(join "\n", @{$File->{Content}});
+ $p->handler(start => $start, 'self,tag,attr');
+
+ my $line = 0;
+ my $max = scalar(@{$File->{Content}});
+ $p->parse(sub {
+ return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef;
+ });
+ $p->eof();
# TODO: These \s here are probably wrong now that the strings are utf8_on
$File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
diff --git a/misc/bundle/META.yml b/misc/bundle/META.yml
index 44292e2..da67bd7 100644
--- a/misc/bundle/META.yml
+++ b/misc/bundle/META.yml
@@ -15,7 +15,7 @@ requires:
Encode::JIS2K: 0
File::Spec::Functions: 0
HTML::Encoding: 0.52
- HTML::Parser: 3.25
+ HTML::Parser: 3.24
HTML::Template: 2.6
LWP::UserAgent: 2.032
Net::IP: 0
diff --git a/misc/bundle/Makefile.PL b/misc/bundle/Makefile.PL
index 6f2f549..cc04879 100644
--- a/misc/bundle/Makefile.PL
+++ b/misc/bundle/Makefile.PL
@@ -14,7 +14,7 @@ WriteMakefile(
Encode::JIS2K => 0,
File::Spec::Functions => 0,
HTML::Encoding => 0.52,
- HTML::Parser => 3.25,
+ HTML::Parser => 3.24,
HTML::Template => 2.6,
LWP::UserAgent => 2.032,
Net::IP => 0,
diff --git a/misc/bundle/lib/Bundle/W3C/Validator.pm b/misc/bundle/lib/Bundle/W3C/Validator.pm
index 2800a10..8a12751 100644
--- a/misc/bundle/lib/Bundle/W3C/Validator.pm
+++ b/misc/bundle/lib/Bundle/W3C/Validator.pm
@@ -27,7 +27,7 @@ C<perl -MCPAN -e "install Bundle::W3C::Validator">
Encode::JIS2K
File::Spec::Functions
HTML::Encoding 0.52
- HTML::Parser 3.25
+ HTML::Parser 3.24
HTML::Template 2.6
LWP::UserAgent 2.032
Net::IP