summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xhtdocs/docs/install.html8
-rwxr-xr-xhtdocs/whatsnew.html10
-rwxr-xr-xhttpd/cgi-bin/check47
3 files changed, 56 insertions, 9 deletions
diff --git a/htdocs/docs/install.html b/htdocs/docs/install.html
index fd09263..eb02146 100755
--- a/htdocs/docs/install.html
+++ b/htdocs/docs/install.html
@@ -1,5 +1,5 @@
-<!--#set var="revision" value="\$Id: install.html,v 1.56 2009-12-02 19:53:34 ville Exp $"
---><!--#set var="date" value="\$Date: 2009-12-02 19:53:34 $"
+<!--#set var="revision" value="\$Id: install.html,v 1.57 2009-12-11 18:40:24 ville Exp $"
+--><!--#set var="date" value="\$Date: 2009-12-11 18:40:24 $"
--><!--#set var="title" value="Installation Documentation for The W3C Markup Validation Service"
--><!--#set var="relroot" value="../"
--><!--#include virtual="../header.html" -->
@@ -139,7 +139,9 @@ install Bundle::W3C::Validator
</dd>
<dt><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> &gt;= 3.24</dt>
<dd>
- Minimal HTML parser used for preparse and finding metadata.
+ Minimal HTML parser used for preparse and finding metadata. Version
+ 3.24 or newer is required, but the full feature set is available
+ only if version 3.60 or newer is installed.
</dd>
<dt><a href="http://search.cpan.org/dist/HTML-Encoding/">HTML-Encoding</a></dt>
<dd>
diff --git a/htdocs/whatsnew.html b/htdocs/whatsnew.html
index 9fd8dd8..f46e2ca 100755
--- a/htdocs/whatsnew.html
+++ b/htdocs/whatsnew.html
@@ -1,5 +1,5 @@
-<!--#set var="revision" value="\$Id: whatsnew.html,v 1.90 2009-12-10 19:28:13 ville Exp $"
---><!--#set var="date" value="\$Date: 2009-12-10 19:28:13 $"
+<!--#set var="revision" value="\$Id: whatsnew.html,v 1.91 2009-12-11 18:40:24 ville Exp $"
+--><!--#set var="date" value="\$Date: 2009-12-11 18:40:24 $"
--><!--#set var="title" value="What's New at The W3C Markup Validation Service"
--><!--#set var="relroot" value="./"
--><!--#set var="feeds" value="1"
@@ -95,6 +95,12 @@
to 45 seconds</a>.
</li>
<li>
+ Enhancement: support for charsets specified with
+ <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=5992"><code>&lt;meta
+ charset="..."&gt;</code> in HTML5 documents</a> has been added
+ (provided that HTML::HeadParser >= 3.60 is installed).
+ </li>
+ <li>
Bug fix:
<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=5132">full
document doctype was incorrectly in effect</a> when validating
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 1ced5f3..c4db09b 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.746 2009-12-10 19:28:13 ville Exp $
+# $Id: check,v 1.747 2009-12-11 18:40:24 ville Exp $
#
# We need Perl 5.8.0+.
@@ -197,7 +197,7 @@ EOF
#
# Strings
- $VERSION = q$Revision: 1.746 $;
+ $VERSION = q$Revision: 1.747 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
# Read friendly error message file
@@ -3069,8 +3069,47 @@ sub find_encodings
$metah{lc($meta)}++ if defined $meta and length $meta;
}
- my @meta = sort { $metah{$b} <=> $metah{$a} } keys %metah;
- $File->{Charset}->{META} = $meta[0] if @meta;
+ if (!%metah) {
+
+ # HTML::Encoding doesn't support HTML5 <meta charset> as of 0.60,
+ # check it ourselves. HTML::HeadParser >= 3.60 is required for this.
+
+ if (eval {
+ local $SIG{__DIE__} = undef;
+ require HTML::HeadParser;
+ HTML::HeadParser->VERSION(3.60);
+ }
+ )
+ {
+ my $hp = HTML::HeadParser->new();
+ my $seen_doctype = FALSE;
+ my $is_html5 = FALSE;
+ $hp->handler(
+ declaration => sub {
+ my ($tag, $text) = @_;
+ return if ($seen_doctype || uc($tag) ne '!DOCTYPE');
+ $seen_doctype = TRUE;
+ $is_html5 = TRUE
+ if (
+ $text =~ /<!DOCTYPE\s+html
+ (\s+SYSTEM\s+(['"])about:legacy-compat\2)?
+ \s*>/six
+ );
+ },
+ 'tag,text'
+ );
+ $hp->parse($File->{Bytes});
+ if ($is_html5) {
+ my $cs = $hp->header('X-Meta-Charset');
+ $metah{lc($cs)}++ if (defined($cs) && length($cs));
+ }
+ }
+ }
+
+ if (%metah) {
+ my @meta = sort { $metah{$b} <=> $metah{$a} } keys %metah;
+ $File->{Charset}->{META} = $meta[0];
+ }
return $File;
}