summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xhttpd/cgi-bin/check65
1 files changed, 44 insertions, 21 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 8a3ba5b..55b106a 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -8,7 +8,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.61 2000-02-01 23:17:01 gerald Exp $
+# $Id: check,v 1.62 2000-02-11 11:07:31 gerald Exp $
#
# We need Perl 5.004.
@@ -73,9 +73,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
#
# Strings
-$VERSION = q$Revision: 1.61 $;
+$VERSION = q$Revision: 1.62 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
-$DATE = q$Date: 2000-02-01 23:17:01 $;
+$DATE = q$Date: 2000-02-11 11:07:31 $;
$MAINTAINER = 'gerald@w3.org';
my $notice = ''; # "<p><strong>Note: This service will be ...</strong>";
@@ -249,9 +249,10 @@ EOF
#
# 1. check if there's a doctype
# 2. if there is a doctype, parse/validate against that DTD
-# 3. if no doctype, check for xml well-formedness
-# 4. if xml is well-formed, check and report xmlns= attribute (anything else?)
-# 5. if xml is not well-formed, report errors
+# 3. if no doctype, check for an xmlns= attribute on the first element
+# 4. if there is an xmlns= attribute, check for XML well-formedness
+# 5. if there is no xmlns= attribute, validate as HTML using the doctype
+# inferred by the check_for_doctype function
#
#
@@ -261,9 +262,9 @@ if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml') {
}
#
-# Set document type to XHTML if the DOCTYPE was for XHTML. This happens when
-# a XHTML file is served as text/html (damn fool idea, if you ask me! -link).
-if ($doctype =~ /xhtml/i) {
+# Set document type to XHTML if the DOCTYPE was for XHTML.
+# This happens when a XHTML file is served as text/html
+if ($doctype =~ /xhtml/i && $guessed_doctype != 2) {
$File->{Type} = 'xhtml';
}
@@ -342,7 +343,7 @@ if ($File->{Type} eq 'xhtml') {
$ENV{SP_CHARSET_FIXED} = 'YES';
$ENV{SP_ENCODING} = 'UTF-8';
$decl = $xhtmldecl;
-} elsif ($guessed_doctype) { # no doctype was present; parse as xml/xhtml
+} elsif ($guessed_doctype == 2) { # no doctype, with xmlns attr on 1st element
$File->{Type} = 'xml'; # @@ probably a better way to do this
$ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/sp-1.3/pubtext/xml.soc';
$ENV{SGML_SEARCH_PATH} = $sgmlstuff . '/sp-1.3/pubtext/';
@@ -362,6 +363,7 @@ my $command = "$codeconv $sp -E0 $xmlflags $catalog $decl";
open CHECKER, "|$command - >$temp.esis 2>$temp"
or die "open(|$command - >$temp.esis 2>$temp) returned: $!\n";
+print CHECKER $doctype, "\n" if $guessed_doctype == 1;
for (@{$File->{Content}}) {print CHECKER $_, "\n"}
close CHECKER;
@@ -411,6 +413,10 @@ if ($File->{Type} eq 'xhtml') {
}
$version = $pub_ids->{$fpi} || 'unknown';
+if ($guessed_doctype == 1) {
+ push( @fake_errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n" );
+}
+
print ' ' x 4, q(<li>Character encoding: ), $File->{Charset};
if ($File->{HTTP_Charset} ne $File->{META_Charset}
and $File->{META_Charset} ne ''
@@ -476,7 +482,7 @@ EOHD
}
-if ( $? ) {
+if ( $? || ($guessed_doctype == 1) ) {
print "<ul>\n";
for ((@fake_errors,@errors)) {
next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
@@ -507,6 +513,7 @@ if ( $? ) {
&output_doctype_spiel;
last;
}
+ $line-- if ( $guessed_doctype == 1 );
my $newline = $File->{Content}->[$line - 1];
# make sure there are no ^P's or ^Q's in the file, since we need to use
@@ -859,6 +866,11 @@ if ( $q->param('ss') ) {
EOF
print "<pre>\n";
+ if ( $guessed_doctype == 1 ) {
+ my $gd = $doctype . "\n";
+ $gd =~ s/&/&amp;/go; $gd =~ s/</&lt;/go;
+ printf "%4d: %s", 0, $gd;
+ }
$line = 1;
for (@{$File->{Content}}) {
s/&/&amp;/go; s/</&lt;/go;
@@ -1090,9 +1102,15 @@ sub build_jump_links {
}
#
-# Check if the document has a doctype; if it doesn't, try to guess an
-# appropriate one given the elements used. Returns 2 values. First value is 0
-# if there was a DOCTYPE and 1 otherwise. The Second value is the doctype.
+# Check if the document has a doctype; if it doesn't, try to guess
+# an appropriate one given the elements used. Returns 2 values.
+# The first value is:
+# 0 if there was a DOCTYPE,
+# 1 if there was no doctype and no xmlns= attribute
+# on the first element in the document, or
+# 2 if there was no doctype and there IS an xmlns= attribute
+# on the first element
+# The Second value is the doctype or namespace, if any.
sub check_for_doctype {
my $file = shift; # a reference to @file, for efficiency
@@ -1100,7 +1118,12 @@ sub check_for_doctype {
my $line = $file->[$count];
# does an HTML element precede the doctype on the same line?
- last if $line =~ /<[a-z].*<!doctype/i;
+ if ( $line =~ /<[a-z].*<!doctype/i ) {
+ if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr
+ return 2, $1;
+ }
+ last;
+ }
if ($line =~ /<!doctype/i) { # found a doctype
my $dttext = join '', @{$file}[$count .. $count + 5];
@@ -1112,17 +1135,17 @@ sub check_for_doctype {
$line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go;
# Strip comments, so the next line doesn't find commented-out markup etc.
# (this doesn't handle multi-line comments, unfortunately)
- last if $line =~ /<[a-z]/i; # found an element
+ if ( $line =~ /<[a-z]/i ) { # found an element
+ if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr
+ return 2, $1;
+ }
+ last;
+ }
}
for (@{$file}[0 .. 20]) {
return 1, $html40f_doctype if /<frame/i;
}
- for (@{$file}) {
- return 1, $html40t_doctype if /<(table|body )/i;
- return 1, $html32_doctype if /<center>/i;
- return 1, $html32_doctype if /<[h0-9p]*\s*align\s*=\s*center>/i;
- }
return 1, $html40t_doctype; # no luck earlier; guess HTML 4.0 transitional
}