diff options
-rwxr-xr-x | htdocs/todo.html | 10 | ||||
-rwxr-xr-x | httpd/cgi-bin/check | 33 |
2 files changed, 33 insertions, 10 deletions
diff --git a/htdocs/todo.html b/htdocs/todo.html index 2d79f82..0e0e23d 100755 --- a/htdocs/todo.html +++ b/htdocs/todo.html @@ -8,7 +8,7 @@ W3C HTML Validation Service, To Do List"> <meta name="description" content="To Do List for W3C's Validation Service."> <meta name="revision" - content="$Id: todo.html,v 1.27 2001-03-06 07:01:47 link Exp $"> + content="$Id: todo.html,v 1.28 2001-03-07 19:53:55 link Exp $"> </head> <body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b"> @@ -40,11 +40,9 @@ </p> <ol> - <li> apply <a - href="http://lists.w3.org/Archives/Public/www-validator/2000JulSep/0146.html">Takuya's - XML encoding patch</a>; merge in <a + <li>Merge in <a href="http://lists.w3.org/Archives/Public/www-validator/2000JulSep/0095.html">Terje's - cleanup patch</a> + cleanup patch</a>. <li> bug fix: <a href="http://validator.w3.org/check?uri=http%3A//www.w3.org/2001/01/xml-latin1.html;ss">this @@ -192,7 +190,7 @@ src="http://validator.w3.org/images/vh40" height=31 width=88 align=right border=0 alt="Valid W3C HTML 4.0!"></a> <a href="/feedback.html">Gerald Oskoboiny</A><br> - $Date: 2001-03-06 07:01:47 $ + $Date: 2001-03-07 19:53:55 $ </address> </body> diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 7ed6969..371e584 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -8,7 +8,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.84 2001-03-06 07:01:48 link Exp $ +# $Id: check,v 1.85 2001-03-07 19:53:55 link Exp $ # # We need Perl 5.004. @@ -73,9 +73,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/'; # # Strings -$VERSION = q$Revision: 1.84 $; +$VERSION = q$Revision: 1.85 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; -$DATE = q$Date: 2001-03-06 07:01:48 $; +$DATE = q$Date: 2001-03-07 19:53:55 $; $MAINTAINER = 'gerald@w3.org'; my $notice = ''; # "<p><strong>Note: This service will be ...</strong>"; @@ -278,11 +278,24 @@ if ($doctype =~ /mathml/i && $guessed_doctype != 2) { } # +# If we find a XML declaration with charset information, +# we take it into account. +foreach my $line (@{$File->{Content}}) { + # @@ needs to handle declarations that span more than one line + if ($line =~ /<\?xml\s/) { + if ($line =~ /encoding\s*=[\s\"\']*([^\s;\"\'>]*)/) { + $File->{XML_Charset} = lc $1; + last; + } else {last}; + } +} + +# # If we find a META element with charset information, we take it into account. foreach my $line (@{$File->{Content}}) { # @@ needs to handle meta elements that span more than one line if ($line =~ /<meta/i) { - if ($line =~ /charset\s*=[\s\"]*([^\s;\">]*)/i) { + if ($line =~ /charset\s*=[\s\"\']*([^\s;\"\'>]*)/i) { $File->{META_Charset} = lc $1; last; } elsif ($line =~ /<body/i || $line =~ /<body/i) { @@ -295,6 +308,8 @@ foreach my $line (@{$File->{Content}}) { # Figure out which charset to use for the validation. if ($File->{HTTP_Charset}) { $File->{Charset} = $File->{HTTP_Charset}; +} elsif ($File->{XML_Charset}) { + $File->{Charset} = $File->{XML_Charset}; } elsif ($File->{META_Charset}) { $File->{Charset} = $File->{META_Charset}; } else { @@ -462,6 +477,16 @@ if ($File->{HTTP_Charset} ne $File->{META_Charset} specified in the META element ("<code>$File->{META_Charset}</code>"). I will use "<code>$File->{Charset}</code>" for this validation.</span></em> EOHD +} elsif ($File->{HTTP_Charset} ne $File->{XML_Charset} + and $File->{HTTP_Charset} ne '' + and $File->{XML_Charset} ne '' + and $File->{Charset} ne 'unknown') { + print <<"EOHD"; + <em><span class="warning">The character encoding specified in the HTTP + header ("<code>$File->{HTTP_Charset}</code>") is different from the one + specified in the XML declaration ("<code>$File->{XML_Charset}</code>"). + I will use "<code>$File->{Charset}</code>" for this validation.</span></em> +EOHD } print ' ' x 4, qq(</li>\n); |