diff options
-rwxr-xr-x | httpd/cgi-bin/check | 65 |
1 files changed, 44 insertions, 21 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 8a3ba5b..55b106a 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -8,7 +8,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.61 2000-02-01 23:17:01 gerald Exp $ +# $Id: check,v 1.62 2000-02-11 11:07:31 gerald Exp $ # # We need Perl 5.004. @@ -73,9 +73,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/'; # # Strings -$VERSION = q$Revision: 1.61 $; +$VERSION = q$Revision: 1.62 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; -$DATE = q$Date: 2000-02-01 23:17:01 $; +$DATE = q$Date: 2000-02-11 11:07:31 $; $MAINTAINER = 'gerald@w3.org'; my $notice = ''; # "<p><strong>Note: This service will be ...</strong>"; @@ -249,9 +249,10 @@ EOF # # 1. check if there's a doctype # 2. if there is a doctype, parse/validate against that DTD -# 3. if no doctype, check for xml well-formedness -# 4. if xml is well-formed, check and report xmlns= attribute (anything else?) -# 5. if xml is not well-formed, report errors +# 3. if no doctype, check for an xmlns= attribute on the first element +# 4. if there is an xmlns= attribute, check for XML well-formedness +# 5. if there is no xmlns= attribute, validate as HTML using the doctype +# inferred by the check_for_doctype function # # @@ -261,9 +262,9 @@ if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml') { } # -# Set document type to XHTML if the DOCTYPE was for XHTML. This happens when -# a XHTML file is served as text/html (damn fool idea, if you ask me! -link). -if ($doctype =~ /xhtml/i) { +# Set document type to XHTML if the DOCTYPE was for XHTML. +# This happens when a XHTML file is served as text/html +if ($doctype =~ /xhtml/i && $guessed_doctype != 2) { $File->{Type} = 'xhtml'; } @@ -342,7 +343,7 @@ if ($File->{Type} eq 'xhtml') { $ENV{SP_CHARSET_FIXED} = 'YES'; $ENV{SP_ENCODING} = 'UTF-8'; $decl = $xhtmldecl; -} elsif ($guessed_doctype) { # no doctype was present; parse as xml/xhtml +} elsif ($guessed_doctype == 2) { # no doctype, with xmlns attr on 1st element $File->{Type} = 'xml'; # @@ probably a better way to do this $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/sp-1.3/pubtext/xml.soc'; $ENV{SGML_SEARCH_PATH} = $sgmlstuff . '/sp-1.3/pubtext/'; @@ -362,6 +363,7 @@ my $command = "$codeconv $sp -E0 $xmlflags $catalog $decl"; open CHECKER, "|$command - >$temp.esis 2>$temp" or die "open(|$command - >$temp.esis 2>$temp) returned: $!\n"; +print CHECKER $doctype, "\n" if $guessed_doctype == 1; for (@{$File->{Content}}) {print CHECKER $_, "\n"} close CHECKER; @@ -411,6 +413,10 @@ if ($File->{Type} eq 'xhtml') { } $version = $pub_ids->{$fpi} || 'unknown'; +if ($guessed_doctype == 1) { + push( @fake_errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n" ); +} + print ' ' x 4, q(<li>Character encoding: ), $File->{Charset}; if ($File->{HTTP_Charset} ne $File->{META_Charset} and $File->{META_Charset} ne '' @@ -476,7 +482,7 @@ EOHD } -if ( $? ) { +if ( $? || ($guessed_doctype == 1) ) { print "<ul>\n"; for ((@fake_errors,@errors)) { next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/; @@ -507,6 +513,7 @@ if ( $? ) { &output_doctype_spiel; last; } + $line-- if ( $guessed_doctype == 1 ); my $newline = $File->{Content}->[$line - 1]; # make sure there are no ^P's or ^Q's in the file, since we need to use @@ -859,6 +866,11 @@ if ( $q->param('ss') ) { EOF print "<pre>\n"; + if ( $guessed_doctype == 1 ) { + my $gd = $doctype . "\n"; + $gd =~ s/&/&/go; $gd =~ s/</</go; + printf "%4d: %s", 0, $gd; + } $line = 1; for (@{$File->{Content}}) { s/&/&/go; s/</</go; @@ -1090,9 +1102,15 @@ sub build_jump_links { } # -# Check if the document has a doctype; if it doesn't, try to guess an -# appropriate one given the elements used. Returns 2 values. First value is 0 -# if there was a DOCTYPE and 1 otherwise. The Second value is the doctype. +# Check if the document has a doctype; if it doesn't, try to guess +# an appropriate one given the elements used. Returns 2 values. +# The first value is: +# 0 if there was a DOCTYPE, +# 1 if there was no doctype and no xmlns= attribute +# on the first element in the document, or +# 2 if there was no doctype and there IS an xmlns= attribute +# on the first element +# The Second value is the doctype or namespace, if any. sub check_for_doctype { my $file = shift; # a reference to @file, for efficiency @@ -1100,7 +1118,12 @@ sub check_for_doctype { my $line = $file->[$count]; # does an HTML element precede the doctype on the same line? - last if $line =~ /<[a-z].*<!doctype/i; + if ( $line =~ /<[a-z].*<!doctype/i ) { + if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr + return 2, $1; + } + last; + } if ($line =~ /<!doctype/i) { # found a doctype my $dttext = join '', @{$file}[$count .. $count + 5]; @@ -1112,17 +1135,17 @@ sub check_for_doctype { $line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # Strip comments, so the next line doesn't find commented-out markup etc. # (this doesn't handle multi-line comments, unfortunately) - last if $line =~ /<[a-z]/i; # found an element + if ( $line =~ /<[a-z]/i ) { # found an element + if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr + return 2, $1; + } + last; + } } for (@{$file}[0 .. 20]) { return 1, $html40f_doctype if /<frame/i; } - for (@{$file}) { - return 1, $html40t_doctype if /<(table|body )/i; - return 1, $html32_doctype if /<center>/i; - return 1, $html32_doctype if /<[h0-9p]*\s*align\s*=\s*center>/i; - } return 1, $html40t_doctype; # no luck earlier; guess HTML 4.0 transitional } |