diff options
author | gerald <gerald@localhost> | 2000-01-26 15:13:24 +0000 |
---|---|---|
committer | gerald <gerald@localhost> | 2000-01-26 15:13:24 +0000 |
commit | 0c57846899e0437ced4a724501cb29bde9300622 (patch) | |
tree | 84d9fb3c32cf706160da56bdb2362f661e0a49aa | |
parent | 463a67b7d4a33682f8d62755c27cc9b8baf34355 (diff) | |
download | markup-validator-0c57846899e0437ced4a724501cb29bde9300622.zip markup-validator-0c57846899e0437ced4a724501cb29bde9300622.tar.gz markup-validator-0c57846899e0437ced4a724501cb29bde9300622.tar.bz2 |
updated handling of xhtml, namespaces; changed default meaning of
'text/html' docs without doctypes to mean XHTML, coinciding with
today's XHTML REC
-rwxr-xr-x | httpd/cgi-bin/check | 127 |
1 files changed, 95 insertions, 32 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 9075d93..c898563 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -8,7 +8,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.57 1999-12-10 18:47:31 gerald Exp $ +# $Id: check,v 1.58 2000-01-26 15:13:24 gerald Exp $ # # We need Perl 5.004. @@ -51,7 +51,7 @@ my $frag_db = $html_path . 'config/frag.cfg'; my $type_db = $html_path . 'config/type.cfg'; my $sgmlstuff = $html_path . 'sgml-lib'; my $sgmldecl = $sgmlstuff . '/REC-html40-19980424/HTML4.decl'; -my $xhtmldecl = $sgmlstuff . '/PR-xhtml1-19991210/xhtml1.dcl'; +my $xhtmldecl = $sgmlstuff . '/REC-xhtml1-20000126/xhtml1.dcl'; my $xmldecl = $sgmlstuff . '/sp-1.3/pubtext/xml.dcl'; my $temp = "/tmp/validate.$$"; # @@ Use POSIX/IO::File tmpfiles instead! @@ -73,9 +73,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/'; # # Strings -$VERSION = q$Revision: 1.57 $; +$VERSION = q$Revision: 1.58 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; -$DATE = q$Date: 1999-12-10 18:47:31 $; +$DATE = q$Date: 2000-01-26 15:13:24 $; $MAINTAINER = 'gerald@w3.org'; my $notice = ''; # "<p><strong>Note: This service will be ...</strong>"; @@ -242,6 +242,18 @@ EOF } # +# Overall parsing algorithm for documents returned as text/html: +# +# For documents that come to us as text/html, +# +# 1. check if there's a doctype +# 2. if there is a doctype, parse/validate against that DTD +# 3. if no doctype, check for xml well-formedness +# 4. if xml is well-formed, check and report xmlns= attribute (anything else?) +# 5. if xml is not well-formed, report errors +# + +# # Try to extract or guess the DOCTYPE for HTML and XHTML files. if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml') { ($guessed_doctype, $doctype) = &check_for_doctype($File->{Content}); @@ -322,13 +334,15 @@ print(' ' x 4, q(<li>Content length: ), $File->{Size}, qq(</li>\n)) my $xmlflags = ''; my $decl = ''; + if ($File->{Type} eq 'xhtml') { - $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/PR-xhtml1-19991210/xhtml.cat'; - $ENV{SGML_SEARCH_PATH} = $sgmlstuff . '/PR-xhtml1-19991210/'; + $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc'; + $ENV{SGML_SEARCH_PATH} = $sgmlstuff . '/REC-xhtml1-20000126/'; $ENV{SP_CHARSET_FIXED} = 'YES'; $ENV{SP_ENCODING} = 'UTF-8'; $decl = $xhtmldecl; -} elsif ($File->{Type} eq 'xml') { +} elsif ($guessed_doctype) { # no doctype was present; parse as xml/xhtml + $File->{Type} = 'xml'; # @@ probably a better way to do this $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/sp-1.3/pubtext/xml.soc'; $ENV{SGML_SEARCH_PATH} = $sgmlstuff . '/sp-1.3/pubtext/'; $ENV{SP_CHARSET_FIXED} = 'YES'; @@ -342,12 +356,11 @@ if ($File->{Type} eq 'xhtml') { my $command = "$codeconv $sp -E0 $xmlflags $catalog $decl"; -# print " <li>nsgmls command line: <code>$command</code>\n"; +# print " <li>nsgmls command line: <code>$command</code>\n"; open CHECKER, "|$command - >$temp.esis 2>$temp" or die "open(|$command - >$temp.esis 2>$temp) returned: $!\n"; -print CHECKER "$doctype\n" if $guessed_doctype; for (@{$File->{Content}}) {print CHECKER $_, "\n"} close CHECKER; @@ -356,8 +369,22 @@ my @errors = <ERRORS>; close ERRORS or warn "close($temp) returned: $!\n"; my @esis; +my $elements_found = 0; +my $root_namespace; +my @other_namespaces; open ESIS, "$temp.esis" or die "open($temp.esis) returned: $!\n"; while (<ESIS>) { + $elements_found++ if ( /^\(/ ); + if ( ($File->{Type} eq 'xml') && # look for xml namespaces + ( (/^Axmlns() \w+ (.*)/) || (/^Axmlns:([^ ]+) \w+ (.*)/) ) ) { + if ( ( ! defined $root_namespace ) && + ( $elements_found == 0 ) && ( $1 eq "" ) ) { + $root_namespace = $2; + } + else { + push( @other_namespaces, $2 ); + } + } next if / IMPLIED$/; next if /^ASDAFORM CDATA /; next if /^ASDAPREF CDATA /; @@ -383,10 +410,6 @@ if ($File->{Type} eq 'xhtml') { } $version = $pub_ids->{$fpi} || 'unknown'; -if ($guessed_doctype) { - push( @fake_errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n" ); -} - print ' ' x 4, q(<li>Character encoding: ), $File->{Charset}; if ($File->{HTTP_Charset} ne $File->{META_Charset} and $File->{META_Charset} ne '' @@ -400,23 +423,48 @@ EOHD } print ' ' x 4, qq(</li>\n); -print ' ' x 4, qq(<li>Document type: <em>), $version, qq(</em></li>\n); +if ($File->{Type} eq 'xml') { + + print ' ' x 4, qq(<li>Document type: ), $version; + if ( ( $type eq "html" ) && + ( $root_namespace ne "http://www.w3.org/1999/xhtml" ) ) { + print "<br>warning: unknown namespace for text/html document!"; + if ( $root_namespace ne '' ) { + print qq{, <a href="$root_namespace">$root_namespace</a>}; + } + print "\n"; + } + else { + if ( $root_namespace ne '' ) { + print qq( with namespace <a href="$root_namespace">$root_namespace</a>); + } + } + + if ( $#other_namespaces >= 0 ) { + print "<br>Other namespaces in this document: "; + for (@other_namespaces) { + print qq(<a href="$_">$_</a>, ), "\n"; + } + } + print qq(</li>\n); +} +else { + print ' ' x 4, qq(<li>Document type: ), $version, qq(</li>\n); +} print ' ' x 2, qq(</ul>\n\n); if ($File->{Type} eq 'xml') { print <<"EOHD"; <p> - <strong>Note: experimental XML support was added to this service - on Aug 31, 1999, but it is not quite working yet; stay tuned to <a - href="http://lists.w3.org/Archives/Public/www-validator/">the - <code>www-validator</code> mailing list</a> for updates, and - please do not trust this service\'s output for XML documents - in the meantime.</strong> + Below are the results of checking this document for <a + href="http://www.w3.org/TR/REC-xml#sec-conformance">XML + well-formedness</a>. </p> + EOHD } - +else { print <<"EOHD"; <p> Below are the results of attempting to parse this document with @@ -425,7 +473,9 @@ print <<"EOHD"; EOHD -if ( $? || $guessed_doctype ) { +} + +if ( $? ) { print "<ul>\n"; for ((@fake_errors,@errors)) { next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/; @@ -456,7 +506,6 @@ if ( $? || $guessed_doctype ) { &output_doctype_spiel; last; } - $line-- if $guessed_doctype; my $newline = $File->{Content}->[$line - 1]; # make sure there are no ^P's or ^Q's in the file, since we need to use @@ -549,7 +598,13 @@ if ( $? || $guessed_doctype ) { $validity="invalid"; } else { - print "\n <pre>\n No errors found!</pre>\n\n"; + if ($File->{Type} eq 'xml') { + print "\n <pre>\n No errors found! "; + print "<a href=\"#sp-lim\">*</a></pre>\n\n"; + } + else { + print "\n <pre>\n No errors found!</pre>\n\n"; + } if ( $version ne "unknown" ) { if ( $version =~ /^HTML 2\.0$/ ) { $gifname = "vh20"; @@ -573,16 +628,21 @@ else { $gifhw = " height=31 width=88"; } elsif ( $version =~ /HTML 4\.01<\/a> Strict$/ ) { - $gifname = "vh40"; + $gifname = "vh401"; $alttext = "Valid HTML 4.01!"; $gifborder = ""; $gifhw = " height=31 width=88"; } elsif ( $version =~ /HTML 4\.01<\/a> / ) { - $gifname = "vh40"; + $gifname = "vh401"; $alttext = "Valid HTML 4.01!"; $gifhw = " height=31 width=88"; } + elsif ( $version =~ /XHTML 1\.0<\/a> / ) { + $gifname = "vxhtml10"; + $alttext = "Valid XHTML 4.01!"; + $gifhw = " height=31 width=88"; + } elsif ( $version =~ /HTML 3\.0/ ) { $gifname = "vh30"; $alttext = "Valid HTML 3.0!"; @@ -651,6 +711,15 @@ EOHD EOHD } $validity="valid"; + if ($File->{Type} eq 'xml') { + print qq{ <h2><a name="sp-lim">Caveat</a></h2> + <p> + This validator is based on SP, which has <a + href="http://www.jclark.com/sp/xml.htm">some limitations + in its support for XML</a>. + </p> + }; + } } if ( $q->param('weblint') ) { @@ -777,11 +846,6 @@ if ( $q->param('ss') ) { EOF print "<pre>\n"; - if ( $guessed_doctype ) { - my $gd = "$doctype\n"; - $gd =~ s/&/&/go; $gd =~ s/</</go; - printf "%4d: %s", 0, $gd; - } $line = 1; for (@{$File->{Content}}) { s/&/&/go; s/</</go; @@ -1039,7 +1103,6 @@ sub check_for_doctype { } for (@{$file}[0 .. 20]) { - return 1, $xhtmlt_doctype if /xmlns\s*=/i; return 1, $html40f_doctype if /<frame/i; } for (@{$file}) { |