incorporates check_for_doctype patch from Terje Bless:

http://lists.w3.org/Archives/Public/www-validator/1999OctDec/0133
author: gerald <gerald@localhost> 1999-12-01 00:59:31 +0000
committer: gerald <gerald@localhost> 1999-12-01 00:59:31 +0000
commit: ce75fac7e297a9fc0ffcecd536e356a788386cab (patch)
tree: 2699194f94a39d04343f1519a5043e3e5b72d456
parent: aa1a1791a2fa6e7168a0ae62f6e6004ff267fa49 (diff)
download: markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.zip
markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.tar.gz
markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.tar.bz2
1 files changed, 33 insertions, 65 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 1818a55..f2d13e4 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -8,7 +8,7 @@
 # This source code is available under the license at:
 #     http://www.w3.org/Consortium/Legal/copyright-software
 #
-# $Id: check,v 1.47 1999-11-15 04:25:52 gerald Exp $
+# $Id: check,v 1.48 1999-12-01 00:59:31 gerald Exp $
 
 #
 # Load modules
@@ -29,8 +29,8 @@ use constant UNDEF => undef;
 # Constant definitions
 #############################################################################
 
-my $cvsrevision	= '$Revision: 1.47 $';
-my $cvsdate	= '$Date: 1999-11-15 04:25:52 $';
+my $cvsrevision	= '$Revision: 1.48 $';
+my $cvsdate	= '$Date: 1999-12-01 00:59:31 $';
 
 my $logfile	= "/var/log/httpd/val-svc";
 
@@ -1116,78 +1116,46 @@ sub build_jump_links {
     return $text;
 
 }
+#
+# Check if the document has a doctype; if it doesn't, try to guess an
+# appropriate one given the elements used. Returns 2 values. First value is 0
+# if there was a DOCTYPE and 1 otherwise. The Second value is the doctype.
 
 sub check_for_doctype {
-    # check if the document has a doctype; if it doesn't, try to
-    # guess an appropriate one given the elements used
-    #
-    # returns 2 values:
-    #
-    # first value: 0 or 1:
-    #     if 0, there was a doctype already present;
-    #     if 1, there wasn't a doctype
-    #
-    # second value:
-    #     the inferred doctype, if any
+  my $file = shift; # a reference to @file, for efficiency
 
-    my $fileref = shift;		# a reference to @file, for efficiency
-    my @file = @$fileref;		# dereference $fileref
+  foreach my $count (0 .. scalar @{$file}) {
+    my $line = $file->[$count];
 
-    foreach $count (0..$#file) {
-	$line = $file[$count];
-
-	# does an HTML element precede the doctype on the same line?
-	last if $line =~ /<[a-z].*<!doctype/i;
-
-	if ( $line =~ /<!doctype/i ) {	# found a doctype
-	    my $dttext = join( "", @file[$count..$count+5] );
-	    $dttext =~ s/\n//g;
-	    $dttext =~ s/.*doctype\s+html\s+public\s*["']//i;
-	    $dttext =~ s/["'].*//;	# strip everything except the FPI
-	        # @@ should make sure both quote chars were the same
-	    return 0, $dttext;
-	}
-
-	$line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go;	# strip comments,
-	    # so the next line doesn't find commented-out markup etc.
-	    # (this doesn't handle multi-line comments, unfortunately)
-
-	last if ( $line =~ /<[a-z]/i );		# found an element
-
-    }
-
-    # do several loops of increasing lengths to avoid iterating over
-    # the whole file if possible.
-    #
-    # these heuristics could be improved a lot.
+    # does an HTML element precede the doctype on the same line?
+    last if $line =~ /<[a-z].*<!doctype/i;
 
-    foreach $line (@file[0..20]) {
-	return 1, $xhtmlt_doctype if $line =~ /xmlns\s*=/i;
+    if ($line =~ /<!doctype/i) { # found a doctype
+      my $dttext = join '', @{$file}[$count .. $count + 5];
+      $dttext =~ s/\n//g;
+      $dttext =~ s/.*?doctype\s+html\s+public\s*([\'\"])([^\1]+?)\1.*/$2/i;
+      return 0, $dttext;
     }
 
-    foreach $line (@file[0..20]) {
-	return 1, $html40f_doctype if $line =~ /<frame/i;
-    }
-
-    foreach $line (@file[0..20]) {
-	return 1, $html40t_doctype if $line =~ /<(table|body )/i;
-    }
-
-    # go through the whole file
-    foreach $line (@file) {
-	return 1, $html40t_doctype if $line =~ /<(table|body )/i;
-    }
-
-    foreach $line (@file) {
-	return 1, $html32_doctype if $line =~ /<center>/i;
-	return 1, $html32_doctype if $line =~ /<[h0-9p]*\s*align\s*=\s*center>/i;
-    }
-
-    # no luck earlier; guess HTML 4.0 transitional
-    return 1, $html40t_doctype;
+    $line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go;
+    # Strip comments, so the next line doesn't find commented-out markup etc.
+    # (this doesn't handle multi-line comments, unfortunately)
+    last if $line =~ /<[a-z]/i; # found an element
+  }
 
+  for (@{$file}[0 .. 20]) {
+    return 1, $xhtmlt_doctype  if /xmlns\s*=/i;
+    return 1, $html40f_doctype if /<frame/i;
+  }
+  for (@{$file}) {
+    return 1, $html40t_doctype if /<(table|body )/i;
+    return 1, $html32_doctype  if /<center>/i;
+    return 1, $html32_doctype  if /<[h0-9p]*\s*align\s*=\s*center>/i;
+  }
+  return 1, $html40t_doctype; # no luck earlier; guess HTML 4.0 transitional
 }
 
+
 sub print_401_auth_required_message {
 
     my $resource = shift;
author	gerald <gerald@localhost>	1999-12-01 00:59:31 +0000
committer	gerald <gerald@localhost>	1999-12-01 00:59:31 +0000
commit	ce75fac7e297a9fc0ffcecd536e356a788386cab (patch)
tree	2699194f94a39d04343f1519a5043e3e5b72d456
parent	aa1a1791a2fa6e7168a0ae62f6e6004ff267fa49 (diff)
download	markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.zip markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.tar.gz markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.tar.bz2