diff options
-rwxr-xr-x | htdocs/docs/install.html | 8 | ||||
-rwxr-xr-x | htdocs/whatsnew.html | 10 | ||||
-rwxr-xr-x | httpd/cgi-bin/check | 47 |
3 files changed, 56 insertions, 9 deletions
diff --git a/htdocs/docs/install.html b/htdocs/docs/install.html index fd09263..eb02146 100755 --- a/htdocs/docs/install.html +++ b/htdocs/docs/install.html @@ -1,5 +1,5 @@ -<!--#set var="revision" value="\$Id: install.html,v 1.56 2009-12-02 19:53:34 ville Exp $" ---><!--#set var="date" value="\$Date: 2009-12-02 19:53:34 $" +<!--#set var="revision" value="\$Id: install.html,v 1.57 2009-12-11 18:40:24 ville Exp $" +--><!--#set var="date" value="\$Date: 2009-12-11 18:40:24 $" --><!--#set var="title" value="Installation Documentation for The W3C Markup Validation Service" --><!--#set var="relroot" value="../" --><!--#include virtual="../header.html" --> @@ -139,7 +139,9 @@ install Bundle::W3C::Validator </dd> <dt><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> >= 3.24</dt> <dd> - Minimal HTML parser used for preparse and finding metadata. + Minimal HTML parser used for preparse and finding metadata. Version + 3.24 or newer is required, but the full feature set is available + only if version 3.60 or newer is installed. </dd> <dt><a href="http://search.cpan.org/dist/HTML-Encoding/">HTML-Encoding</a></dt> <dd> diff --git a/htdocs/whatsnew.html b/htdocs/whatsnew.html index 9fd8dd8..f46e2ca 100755 --- a/htdocs/whatsnew.html +++ b/htdocs/whatsnew.html @@ -1,5 +1,5 @@ -<!--#set var="revision" value="\$Id: whatsnew.html,v 1.90 2009-12-10 19:28:13 ville Exp $" ---><!--#set var="date" value="\$Date: 2009-12-10 19:28:13 $" +<!--#set var="revision" value="\$Id: whatsnew.html,v 1.91 2009-12-11 18:40:24 ville Exp $" +--><!--#set var="date" value="\$Date: 2009-12-11 18:40:24 $" --><!--#set var="title" value="What's New at The W3C Markup Validation Service" --><!--#set var="relroot" value="./" --><!--#set var="feeds" value="1" @@ -95,6 +95,12 @@ to 45 seconds</a>. </li> <li> + Enhancement: support for charsets specified with + <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=5992"><code><meta + charset="..."></code> in HTML5 documents</a> has been added + (provided that HTML::HeadParser >= 3.60 is installed). + </li> + <li> Bug fix: <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=5132">full document doctype was incorrectly in effect</a> when validating diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 1ced5f3..c4db09b 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -14,7 +14,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.746 2009-12-10 19:28:13 ville Exp $ +# $Id: check,v 1.747 2009-12-11 18:40:24 ville Exp $ # # We need Perl 5.8.0+. @@ -197,7 +197,7 @@ EOF # # Strings - $VERSION = q$Revision: 1.746 $; + $VERSION = q$Revision: 1.747 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; # Read friendly error message file @@ -3069,8 +3069,47 @@ sub find_encodings $metah{lc($meta)}++ if defined $meta and length $meta; } - my @meta = sort { $metah{$b} <=> $metah{$a} } keys %metah; - $File->{Charset}->{META} = $meta[0] if @meta; + if (!%metah) { + + # HTML::Encoding doesn't support HTML5 <meta charset> as of 0.60, + # check it ourselves. HTML::HeadParser >= 3.60 is required for this. + + if (eval { + local $SIG{__DIE__} = undef; + require HTML::HeadParser; + HTML::HeadParser->VERSION(3.60); + } + ) + { + my $hp = HTML::HeadParser->new(); + my $seen_doctype = FALSE; + my $is_html5 = FALSE; + $hp->handler( + declaration => sub { + my ($tag, $text) = @_; + return if ($seen_doctype || uc($tag) ne '!DOCTYPE'); + $seen_doctype = TRUE; + $is_html5 = TRUE + if ( + $text =~ /<!DOCTYPE\s+html + (\s+SYSTEM\s+(['"])about:legacy-compat\2)? + \s*>/six + ); + }, + 'tag,text' + ); + $hp->parse($File->{Bytes}); + if ($is_html5) { + my $cs = $hp->header('X-Meta-Charset'); + $metah{lc($cs)}++ if (defined($cs) && length($cs)); + } + } + } + + if (%metah) { + my @meta = sort { $metah{$b} <=> $metah{$a} } keys %metah; + $File->{Charset}->{META} = $meta[0]; + } return $File; } |