summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xhtdocs/docs/errors.html20
-rw-r--r--htdocs/results.css13
-rwxr-xr-xhttpd/cgi-bin/check98
3 files changed, 95 insertions, 36 deletions
diff --git a/htdocs/docs/errors.html b/htdocs/docs/errors.html
index 1f1869c..613a4fd 100755
--- a/htdocs/docs/errors.html
+++ b/htdocs/docs/errors.html
@@ -8,8 +8,8 @@
<link rel="stylesheet" type="text/css" href="/base.css" />
<meta name="keywords" content="HTML, Hypertext Markup Language, Validation, W3C HTML Validation Service" />
<meta name="description" content="W3C's easy-to-use HTML validation service, based on an SGML parser." />
- <meta name="revision" content="$Id: errors.html,v 1.7 2001-07-14 22:19:59 link Exp $" />
- <meta name="modified" content="$Date: 2001-07-14 22:19:59 $" />
+ <meta name="revision" content="$Id: errors.html,v 1.8 2001-07-24 10:18:31 link Exp $" />
+ <meta name="modified" content="$Date: 2001-07-24 10:18:31 $" />
</head>
<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b">
@@ -559,6 +559,20 @@
the previous error referred to.
</p>
</dd>
+ <dt><a id="utf8-bom" name="utf8-bom">"UTF-8 'BOM' detected and removed"</a></dt>
+ <dd>
+ <p>
+ The document contained an UTF-8 encoded UNICODE Byte Order mark as
+ the first character and we have removed before parsing. The legality
+ of a UTF-8 encoded BOM in XML is controversial and many XML Processors
+ do not allow it. To be on the safe side you should avoid using the
+ BOM in UTF-8 encoded documents.
+ </p>
+ <p>
+ Note, though, that the BOM in <em>UTF-16</em> encoded documents
+ is non-controverial and handled by all conforming XML Processors.
+ </p>
+ </dd>
</dl>
<hr />
@@ -567,7 +581,7 @@
src="http://validator.w3.org/images/vxhtml10" height="31" width="88"
align="right" border="0" alt="Valid XHTML 1.0!" /></a>
<a href="/feedback.html">Webmaster</a><br />
- $Date: 2001-07-14 22:19:59 $
+ $Date: 2001-07-24 10:18:31 $
</address>
</body>
diff --git a/htdocs/results.css b/htdocs/results.css
index 4b00e1c..947b1ba 100644
--- a/htdocs/results.css
+++ b/htdocs/results.css
@@ -1,7 +1,7 @@
/* style sheet for the validator's results page */
-/* $Id: results.css,v 1.4 2001-06-25 19:22:09 link Exp $ */
+/* $Id: results.css,v 1.5 2001-07-24 10:18:30 link Exp $ */
.markup {
color: red;
@@ -30,3 +30,14 @@ td {
h1.title img {
vertical-align: middle;
}
+
+
+.Warning {
+ width: 75ex;
+ background: yellow;
+ border: solid;
+ border-color: black;
+ margin: .2em;
+ padding: .5em;
+ text-align: justify;
+}
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 940ae44..1637c5f 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -9,7 +9,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.160 2001-07-24 09:54:56 link Exp $
+# $Id: check,v 1.161 2001-07-24 10:18:31 link Exp $
#
# We need Perl 5.004.
@@ -80,9 +80,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
#
# Strings
-$VERSION = q$Revision: 1.160 $;
+$VERSION = q$Revision: 1.161 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
-$DATE = q$Date: 2001-07-24 09:54:56 $;
+$DATE = q$Date: 2001-07-24 10:18:31 $;
$MAINTAINER = 'gerald@w3.org';
$NOTICE = ''; # "<p><strong>Note: This service will be ...</strong>";
@@ -257,10 +257,27 @@ EOF
#
#
+# Detect and remove a UTF-8 BOM.
+$File->{Content}[0] =~ s/^\xEF\xBB\xBF//
+ and &add_warning(<<".EOF.");
+ UTF-8 'BOM' detected and removed. (See the
+ <a href="$faqerrloc#utf8-bom">explanation</a> for details.)
+.EOF.
+
+
+#
# Override DOCTYPE if user asked for it.
if (defined $q->param('doctype') and not $q->param('doctype') =~ /Inline/i) {
$File->{Content} = &supress_doctype($File->{Content});
unshift @{$File->{Content}}, $doctypes->{$q->param('doctype')};
+ my $dtd = ent($q->param('doctype'));
+ &add_warning(<<".EOF.");
+ <strong>DOCTYPE Override in effect!</strong> Any DOCTYPE Declaration in the
+ document has been supressed and the DOCTYPE for &#171;<code>$dtd</code>&#187;
+ inserted instead. The document will not be Valid until you alter the source
+ file to reflect this new DOCTYPE.
+.EOF.
+ $File->{Tentative} = TRUE; # Tag it as Invalid.
}
#
@@ -352,34 +369,39 @@ if ($File->{HTTP_Charset} ne $File->{META_Charset}
and $File->{HTTP_Charset} ne ''
and $File->{META_Charset} ne ''
and $File->{Charset} ne 'unknown') {
- &add_warning( <<"EOHD");
- The character encoding specified in the HTTP
- header ("<code>$File->{HTTP_Charset}</code>") is different from the one
- specified in the META element ("<code>$File->{META_Charset}</code>").
- I will use "<code>$File->{Charset}</code>" for this validation.
+ &add_warning(<<"EOHD");
+ <strong>Character Encoding mismatch!</strong>
+ The character encoding specified in the HTTP header
+ (&#171;<code>$File->{HTTP_Charset}</code>&#187;) is different from the
+ one specified in the META element
+ (&#171;<code>$File->{META_Charset}</code>&#187;).
+ I will use &#171;<code>$File->{Charset}</code>&#187; for this validation.
EOHD
} elsif ($File->{HTTP_Charset} ne $File->{XML_Charset}
and $File->{HTTP_Charset} ne ''
and $File->{XML_Charset} ne ''
and $File->{Charset} ne 'unknown') {
- &add_warning( <<"EOHD");
- The character encoding specified in the HTTP
- header ("<code>$File->{HTTP_Charset}</code>") is different from the one
- specified in the XML declaration ("<code>$File->{XML_Charset}</code>").
- I will use "<code>$File->{Charset}</code>" for this validation.
+ &add_warning(<<"EOHD");
+ <strong>Character Encoding mismatch!</strong>
+ The character encoding specified in the HTTP header
+ (&#171;<code>$File->{HTTP_Charset}</code>&#187;) is different from the
+ one specified in the XML declaration
+ (&#171;<code>$File->{XML_Charset}</code>&#187;).
+ I will use &#171;<code>$File->{Charset}</code>&#187; for this validation.
EOHD
}
if ($File->{Use_Charset} ne $File->{Charset}) {
- &add_warning( <<"EOHD");
- Detected character encoding ($File->{Charset}) and
- selected character encoding ($File->{Use_Charset})
- differ. Tentative validation only.
+ &add_warning(<<"EOHD");
+ <strong>Character Encoding Override in effect!</strong>
+ The detected character encoding
+ (&#171;<code>$File->{Charset}</code>&#187;) has been supressed and the
+ character encoding (&#171;<code>$File->{Use_Charset}</code>&#187;)
+ used instead. The document will not be Valid until you alter the source
+ file to reflect this new Character Encoding.
EOHD
+ $File->{Tentative} = TRUE;
}
-$File->{Content}[0] =~ s/^\xEF\xBB\xBF//
- and &add_warning("UTF-8 'BOM' detected and removed.");
-
{ # block for character conversion and checking
my @lines;
unless ($File->{Use_Charset} eq 'utf-8' or $File->{Use_Charset} eq 'unknown') {
@@ -522,7 +544,7 @@ $version = $pub_ids->{$fpi} || 'unknown';
if ($File->{Type} eq 'xml' or $File->{Type} eq 'xhtml') {
&add_table("Document Type", $version);
if ($File->{Type} eq 'xhtml' and $File->{Namespace} ne 'http://www.w3.org/1999/xhtml') {
- &add_warning ("Unknown namespace for text/html document!");
+ &add_warning ("Unknown namespace (&#171;<code>$File->{Namespace}</code>&#187;) for text/html document!");
if ($File->{Namespace} ne '') {
&add_table("Root Namespace",
"<a href='$File->{Namespace}'>$File->{Namespace}</a>");
@@ -579,10 +601,23 @@ EOHD
Below are the results of attempting to parse this document with
an SGML parser.
</p>
-
EOHD
}
+if (defined $File->{Tentative}) {
+ print <<".EOF.";
+ <p class="Warning">
+ Please note that you have chosen one or more options that alter the content
+ of the document before Validation. Even if no errors are reported below,
+ the document will not be Valid until you manually make the changes we have
+ performed automatically. Specifically, since you used some of the options
+ that Override a property of the document (e.g. the DOCTYPE or Character
+ Encoding), you must make the same change to the source document before it
+ can be valid.
+ </p>
+.EOF.
+}
+
if (scalar @{$File->{Errors}}) {
$q->param('ss', TRUE);
&report_errors($File)
@@ -633,17 +668,12 @@ sub print_table {
print " </table></form>\n";
}
-sub add_warning {
- push @{$File->{Warnings}}, shift;
-}
+sub add_warning {push @{$File->{Warnings}}, shift};
sub print_warnings {
- my $warning;
return unless defined @{$File->{Warnings}};
print " <div><h2>Warnings</h2>\n <ul>\n";
- for $warning (@{$File->{Warnings}}) {
- print " <li class='warning'>$warning</li>\n";
- }
+ print qq( <li>Warning: $_</li>\n) for @{$File->{Warnings}};
print " </ul></div>\n";
}
@@ -1222,7 +1252,8 @@ sub report_valid {
} else {
print "\n <pre>\n No errors found!</pre>\n\n";
}
- unless ($version eq 'unknown') {
+
+ unless ($version eq 'unknown' or defined $File->{Tentative}) {
if ($version =~ /^HTML 2\.0$/) {
$image_uri = "${abs_svc_uri}images/vh20";
$alttext = "Valid HTML 2.0!";
@@ -1277,6 +1308,7 @@ sub report_valid {
$image_uri = "${abs_svc_uri}images/vhhj";
$alttext = "Valid Hotjava-HTML!";
}
+
if (defined $image_uri) {
print <<"EOHD";
<p>
@@ -1307,7 +1339,9 @@ EOHD
}
}
if ($File->{Type} eq 'xml' and not $File->{DOCTYPE}) {
- print " <p>\n Congratulations, this document is well-formed XML.\n </p>\n";
+ print " <p>Congratulations, this document is well-formed XML.</p>\n";
+ } elsif (defined $File->{Tentative}) {
+ print " <p>\n This document would validate as the document type specified if you updated it to match the Options used.\n </p>\n";
} elsif ($version eq 'unknown' or not defined $image_uri) {
print " <p>\n Congratulations, this document validates as the document type specified! (I don't have an icon for this one yet, sorry.)\n </p>\n";
}
@@ -1566,7 +1600,7 @@ sub preparse {
if (lc $tag eq 'meta') {
if (lc $attr{'http-equiv'} eq 'content-type') {
$attr{content} =~ m(charset\s*=[\s\"\']*([^\s;\"\'>]*))si;
- $File->{META_Charset} = lc $1;
+ $File->{META_Charset} = lc $1;
}
}
return unless $tag eq $File->{Root};