3 files changed, 95 insertions, 36 deletions
diff --git a/htdocs/docs/errors.html b/htdocs/docs/errors.html
index 1f1869c..613a4fd 100755
--- a/htdocs/docs/errors.html
+++ b/htdocs/docs/errors.html
@@ -8,8 +8,8 @@
     <link rel="stylesheet" type="text/css" href="/base.css" />
     <meta name="keywords" content="HTML, Hypertext Markup Language, Validation, W3C HTML Validation Service" />
     <meta name="description" content="W3C's easy-to-use HTML validation service, based on an SGML parser." />
-    <meta name="revision" content="$Id: errors.html,v 1.7 2001-07-14 22:19:59 link Exp $" />
-    <meta name="modified" content="$Date: 2001-07-14 22:19:59 $" />
+    <meta name="revision" content="$Id: errors.html,v 1.8 2001-07-24 10:18:31 link Exp $" />
+    <meta name="modified" content="$Date: 2001-07-24 10:18:31 $" />
   </head>
 
   <body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b">
@@ -559,6 +559,20 @@
           the previous error referred to.
         </p>
       </dd>
+      <dt><a id="utf8-bom" name="utf8-bom">"UTF-8 'BOM' detected and removed"</a></dt>
+      <dd>
+        <p>
+          The document contained an UTF-8 encoded UNICODE Byte Order mark as
+          the first character and we have removed before parsing. The legality
+          of a UTF-8 encoded BOM in XML is controversial and many XML Processors
+          do not allow it. To be on the safe side you should avoid using the
+          BOM in UTF-8 encoded documents.
+        </p>
+        <p>
+          Note, though, that the BOM in <em>UTF-16</em> encoded documents
+          is non-controverial and handled by all conforming XML Processors.
+        </p>
+      </dd>
     </dl>
 
     <hr />
@@ -567,7 +581,7 @@
 	 src="http://validator.w3.org/images/vxhtml10" height="31" width="88"
 	 align="right" border="0" alt="Valid XHTML 1.0!" /></a>
       <a href="/feedback.html">Webmaster</a><br />
-	 $Date: 2001-07-14 22:19:59 $
+	 $Date: 2001-07-24 10:18:31 $
     </address>
 
   </body>
diff --git a/htdocs/results.css b/htdocs/results.css
index 4b00e1c..947b1ba 100644
--- a/htdocs/results.css
+++ b/htdocs/results.css
@@ -1,7 +1,7 @@
 
 /* style sheet for the validator's results page */
 
-/* $Id: results.css,v 1.4 2001-06-25 19:22:09 link Exp $ */
+/* $Id: results.css,v 1.5 2001-07-24 10:18:30 link Exp $ */
 
 .markup {
   color: red;
@@ -30,3 +30,14 @@ td {
 h1.title img {
   vertical-align: middle;
 }
+
+
+.Warning {
+  width: 75ex;
+  background: yellow;
+  border: solid;
+  border-color: black;
+  margin: .2em;
+  padding: .5em;
+  text-align: justify;
+}
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 940ae44..1637c5f 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -9,7 +9,7 @@
 # This source code is available under the license at:
 #     http://www.w3.org/Consortium/Legal/copyright-software
 #
-# $Id: check,v 1.160 2001-07-24 09:54:56 link Exp $
+# $Id: check,v 1.161 2001-07-24 10:18:31 link Exp $
 
 #
 # We need Perl 5.004.
@@ -80,9 +80,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
 
 #
 # Strings
-$VERSION    =  q$Revision: 1.160 $;
+$VERSION    =  q$Revision: 1.161 $;
 $VERSION    =~ s/Revision: ([\d\.]+) /$1/;
-$DATE       =  q$Date: 2001-07-24 09:54:56 $;
+$DATE       =  q$Date: 2001-07-24 10:18:31 $;
 $MAINTAINER =  'gerald@w3.org';
 $NOTICE     =  ''; # "<p><strong>Note: This service will be ...</strong>";
 
@@ -257,10 +257,27 @@ EOF
 #
 
 #
+# Detect and remove a UTF-8 BOM.
+$File->{Content}[0] =~ s/^\xEF\xBB\xBF//
+  and &add_warning(<<".EOF.");
+     UTF-8 'BOM' detected and removed. (See the
+     <a href="$faqerrloc#utf8-bom">explanation</a> for details.)
+.EOF.
+
+
+#
 # Override DOCTYPE if user asked for it.
 if (defined $q->param('doctype') and not $q->param('doctype') =~ /Inline/i) {
   $File->{Content} = &supress_doctype($File->{Content});
   unshift @{$File->{Content}}, $doctypes->{$q->param('doctype')};
+  my $dtd = ent($q->param('doctype'));
+  &add_warning(<<".EOF.");
+  <strong>DOCTYPE Override in effect!</strong> Any DOCTYPE Declaration in the
+  document has been supressed and the DOCTYPE for &#171;<code>$dtd</code>&#187;
+  inserted instead. The document will not be Valid until you alter the source
+  file to reflect this new DOCTYPE.
+.EOF.
+  $File->{Tentative} = TRUE; # Tag it as Invalid.
 }
 
 #
@@ -352,34 +369,39 @@ if ($File->{HTTP_Charset} ne $File->{META_Charset}
     and $File->{HTTP_Charset} ne ''
     and $File->{META_Charset} ne ''
     and $File->{Charset} ne 'unknown') {
-  &add_warning( <<"EOHD");
-      The character encoding specified in the HTTP
-      header ("<code>$File->{HTTP_Charset}</code>") is different from the one
-      specified in the META element ("<code>$File->{META_Charset}</code>").
-      I will use "<code>$File->{Charset}</code>" for this validation.
+  &add_warning(<<"EOHD");
+      <strong>Character Encoding mismatch!</strong>
+      The character encoding specified in the HTTP header
+      (&#171;<code>$File->{HTTP_Charset}</code>&#187;) is different from the
+      one specified in the META element
+      (&#171;<code>$File->{META_Charset}</code>&#187;).
+      I will use &#171;<code>$File->{Charset}</code>&#187; for this validation.
 EOHD
 } elsif ($File->{HTTP_Charset} ne $File->{XML_Charset}
     and $File->{HTTP_Charset} ne ''
     and $File->{XML_Charset} ne ''
     and $File->{Charset} ne 'unknown') {
-  &add_warning( <<"EOHD");
-      The character encoding specified in the HTTP
-      header ("<code>$File->{HTTP_Charset}</code>") is different from the one
-      specified in the XML declaration ("<code>$File->{XML_Charset}</code>").
-      I will use "<code>$File->{Charset}</code>" for this validation.
+  &add_warning(<<"EOHD");
+      <strong>Character Encoding mismatch!</strong>
+      The character encoding specified in the HTTP header
+      (&#171;<code>$File->{HTTP_Charset}</code>&#187;) is different from the
+      one specified in the XML declaration
+      (&#171;<code>$File->{XML_Charset}</code>&#187;).
+      I will use &#171;<code>$File->{Charset}</code>&#187; for this validation.
 EOHD
 }
 if ($File->{Use_Charset} ne $File->{Charset}) {
-  &add_warning( <<"EOHD");
-      Detected character encoding ($File->{Charset}) and
-      selected character encoding ($File->{Use_Charset})
-      differ. Tentative validation only.
+  &add_warning(<<"EOHD");
+      <strong>Character Encoding Override in effect!</strong>
+      The detected character encoding
+      (&#171;<code>$File->{Charset}</code>&#187;) has been supressed and the
+      character encoding (&#171;<code>$File->{Use_Charset}</code>&#187;)
+      used instead. The document will not be Valid until you alter the source
+      file to reflect this new Character Encoding.
 EOHD
+  $File->{Tentative} = TRUE;
 }
 
-$File->{Content}[0] =~ s/^\xEF\xBB\xBF//
-  and &add_warning("UTF-8 'BOM' detected and removed.");
-
 {  # block for character conversion and checking
   my @lines;
   unless ($File->{Use_Charset} eq 'utf-8' or $File->{Use_Charset} eq 'unknown') {
@@ -522,7 +544,7 @@ $version = $pub_ids->{$fpi} || 'unknown';
 if ($File->{Type} eq 'xml' or $File->{Type} eq 'xhtml') {
   &add_table("Document Type", $version);
   if ($File->{Type} eq 'xhtml' and $File->{Namespace} ne 'http://www.w3.org/1999/xhtml') {
-    &add_warning ("Unknown namespace for text/html document!");
+    &add_warning ("Unknown namespace (&#171;<code>$File->{Namespace}</code>&#187;) for text/html document!");
     if ($File->{Namespace} ne '') {
       &add_table("Root Namespace",
 	"<a href='$File->{Namespace}'>$File->{Namespace}</a>");
@@ -579,10 +601,23 @@ EOHD
     Below are the results of attempting to parse this document with
     an SGML parser.
   </p>
-
 EOHD
 }
 
+if (defined $File->{Tentative}) {
+  print <<".EOF.";
+  <p class="Warning">
+    Please note that you have chosen one or more options that alter the content
+    of the document before Validation. Even if no errors are reported below,
+    the document will not be Valid until you manually make the changes we have
+    performed automatically. Specifically, since you used some of the options
+    that Override a property of the document (e.g. the DOCTYPE or Character
+    Encoding), you must make the same change to the source document before it
+    can be valid.
+  </p>
+.EOF.
+}
+
 if (scalar @{$File->{Errors}}) {
   $q->param('ss', TRUE);
   &report_errors($File)
@@ -633,17 +668,12 @@ sub print_table {
   print "  </table></form>\n";
 }
 
-sub add_warning {
-  push @{$File->{Warnings}}, shift;
-}
+sub add_warning {push @{$File->{Warnings}}, shift};
 
 sub print_warnings {
-  my $warning;
   return unless defined @{$File->{Warnings}};
   print "  <div><h2>Warnings</h2>\n  <ul>\n";
-  for $warning (@{$File->{Warnings}}) {
-    print "    <li class='warning'>$warning</li>\n";
-  }
+  print qq(    <li>Warning: $_</li>\n) for @{$File->{Warnings}};
   print "  </ul></div>\n";
 }
 
@@ -1222,7 +1252,8 @@ sub report_valid {
   } else {
     print "\n  <pre>\n    No errors found!</pre>\n\n";
   }
-  unless ($version eq 'unknown') {
+
+  unless ($version eq 'unknown' or defined $File->{Tentative}) {
     if ($version =~ /^HTML 2\.0$/) {
       $image_uri = "${abs_svc_uri}images/vh20";
       $alttext = "Valid HTML 2.0!";
@@ -1277,6 +1308,7 @@ sub report_valid {
       $image_uri = "${abs_svc_uri}images/vhhj";
       $alttext = "Valid Hotjava-HTML!";
     }
+
     if (defined $image_uri) {
       print <<"EOHD";
   <p>
@@ -1307,7 +1339,9 @@ EOHD
     }
   }
   if ($File->{Type} eq 'xml' and not $File->{DOCTYPE}) {
-    print "  <p>\n    Congratulations, this document is well-formed XML.\n  </p>\n";
+    print "  <p>Congratulations, this document is well-formed XML.</p>\n";
+  } elsif (defined $File->{Tentative}) {
+    print "  <p>\n    This document would validate as the document type specified if you updated it to match the Options used.\n  </p>\n";
   } elsif ($version eq 'unknown' or not defined $image_uri) {
     print "  <p>\n    Congratulations, this document validates as the document type specified! (I don't have an icon for this one yet, sorry.)\n  </p>\n";
   }
@@ -1566,7 +1600,7 @@ sub preparse {
       if (lc $tag eq 'meta') {
 	if (lc $attr{'http-equiv'} eq 'content-type') {
 	  $attr{content} =~ m(charset\s*=[\s\"\']*([^\s;\"\'>]*))si;
- 	  $File->{META_Charset} = lc $1;
+	  $File->{META_Charset} = lc $1;
 	}
       }
       return unless $tag eq $File->{Root};