diff options
-rwxr-xr-x | httpd/cgi-bin/check | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index e3759c7..0c8f8a2 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -8,7 +8,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.120 2001-06-22 02:01:04 link Exp $ +# $Id: check,v 1.121 2001-06-22 07:15:06 link Exp $ # # We need Perl 5.004. @@ -76,9 +76,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/'; # # Strings -$VERSION = q$Revision: 1.120 $; +$VERSION = q$Revision: 1.121 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; -$DATE = q$Date: 2001-06-22 02:01:04 $; +$DATE = q$Date: 2001-06-22 07:15:06 $; $MAINTAINER = 'gerald@w3.org'; $NOTICE = ''; # "<p><strong>Note: This service will be ...</strong>"; @@ -250,8 +250,7 @@ EOF # 2. if there is a doctype, parse/validate against that DTD # 3. if no doctype, check for an xmlns= attribute on the first element # 4. if there is an xmlns= attribute, check for XML well-formedness -# 5. if there is no xmlns= attribute, validate as HTML using the doctype -# inferred by the check_for_doctype function +# 5. if there is no xmlns= attribute, and no DOCTYPE, punt. # # @@ -262,11 +261,9 @@ if (defined $q->param('doctype') and not $q->param('doctype') =~ /Inline/i) { } # -# Try to extract or guess the DOCTYPE for HTML and XHTML files. -if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml' - or $q->param('uploaded_file') ) { - $File->{DOCTYPE} = &get_doctype($File->{Content}); -} +# Try to extract a DOCTYPE or xmlns. +$File = &preparse($File); + # # Set document type to XHTML if the DOCTYPE was for XHTML. @@ -453,13 +450,14 @@ if ($File->{Type} eq 'xhtml') { $ENV{SP_CHARSET_FIXED} = 'YES'; $ENV{SP_ENCODING} = 'XML'; $xmlflags = '-wxml '; -} elsif ($File->{Type} eq 'xml') { +} elsif ($File->{Type} eq 'xml' or $File->{Namespace}) { # no doctype, with xmlns attr on 1st element $File->{Type} = 'xml'; # @@ probably a better way to do this $catalog = $sgmlstuff . '/sp-1.3/pubtext/xml.soc'; $ENV{SP_CHARSET_FIXED} = 'YES'; $ENV{SP_ENCODING} = 'XML'; $xmlflags = '-wxml'; + $xmlflags .= ' -wno-valid' unless $File->{DOCTYPE}; } my $command = "$sp -f$temp -E0 $xmlflags -c $catalog"; @@ -1475,3 +1473,34 @@ EOF print "</div>\n"; } + +# +# @@FIXME@@ Add description. +sub preparse { + my $File = shift; + + my $dtd = sub {return if $File->{Root}; ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+"([^\"]+)".*>)si}; +# my $dtd = sub {print "DTD: ", shift(), "\n"}; +# my $pi = sub {print "PI: ", shift(), "\n"}; + my $start = sub { + my $tag = shift; + my $attr = shift; + if ($File->{Root}) { + return unless $tag eq $File->{Root}; + } else { + $File->{Root} = $tag; + } + if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}}; + }; + + my $p = HTML::Parser->new(api_version => 3); + $p->xml_mode(TRUE); + $p->ignore_elements('BODY'); + $p->ignore_elements('body'); + $p->handler(declaration => $dtd, 'text'); +# $p->handler(process => $pi, 'text'); + $p->handler(start => $start, 'tag,attr'); + $p->parse(join "\n", @{$File->{Content}}); + + return $File; +} |