summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgerald <gerald@localhost>1999-12-01 00:59:31 +0000
committergerald <gerald@localhost>1999-12-01 00:59:31 +0000
commitce75fac7e297a9fc0ffcecd536e356a788386cab (patch)
tree2699194f94a39d04343f1519a5043e3e5b72d456
parentaa1a1791a2fa6e7168a0ae62f6e6004ff267fa49 (diff)
downloadmarkup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.zip
markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.tar.gz
markup-validator-ce75fac7e297a9fc0ffcecd536e356a788386cab.tar.bz2
incorporates check_for_doctype patch from Terje Bless:
http://lists.w3.org/Archives/Public/www-validator/1999OctDec/0133
-rwxr-xr-xhttpd/cgi-bin/check98
1 files changed, 33 insertions, 65 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 1818a55..f2d13e4 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -8,7 +8,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.47 1999-11-15 04:25:52 gerald Exp $
+# $Id: check,v 1.48 1999-12-01 00:59:31 gerald Exp $
#
# Load modules
@@ -29,8 +29,8 @@ use constant UNDEF => undef;
# Constant definitions
#############################################################################
-my $cvsrevision = '$Revision: 1.47 $';
-my $cvsdate = '$Date: 1999-11-15 04:25:52 $';
+my $cvsrevision = '$Revision: 1.48 $';
+my $cvsdate = '$Date: 1999-12-01 00:59:31 $';
my $logfile = "/var/log/httpd/val-svc";
@@ -1116,78 +1116,46 @@ sub build_jump_links {
return $text;
}
+#
+# Check if the document has a doctype; if it doesn't, try to guess an
+# appropriate one given the elements used. Returns 2 values. First value is 0
+# if there was a DOCTYPE and 1 otherwise. The Second value is the doctype.
sub check_for_doctype {
- # check if the document has a doctype; if it doesn't, try to
- # guess an appropriate one given the elements used
- #
- # returns 2 values:
- #
- # first value: 0 or 1:
- # if 0, there was a doctype already present;
- # if 1, there wasn't a doctype
- #
- # second value:
- # the inferred doctype, if any
+ my $file = shift; # a reference to @file, for efficiency
- my $fileref = shift; # a reference to @file, for efficiency
- my @file = @$fileref; # dereference $fileref
+ foreach my $count (0 .. scalar @{$file}) {
+ my $line = $file->[$count];
- foreach $count (0..$#file) {
- $line = $file[$count];
-
- # does an HTML element precede the doctype on the same line?
- last if $line =~ /<[a-z].*<!doctype/i;
-
- if ( $line =~ /<!doctype/i ) { # found a doctype
- my $dttext = join( "", @file[$count..$count+5] );
- $dttext =~ s/\n//g;
- $dttext =~ s/.*doctype\s+html\s+public\s*["']//i;
- $dttext =~ s/["'].*//; # strip everything except the FPI
- # @@ should make sure both quote chars were the same
- return 0, $dttext;
- }
-
- $line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # strip comments,
- # so the next line doesn't find commented-out markup etc.
- # (this doesn't handle multi-line comments, unfortunately)
-
- last if ( $line =~ /<[a-z]/i ); # found an element
-
- }
-
- # do several loops of increasing lengths to avoid iterating over
- # the whole file if possible.
- #
- # these heuristics could be improved a lot.
+ # does an HTML element precede the doctype on the same line?
+ last if $line =~ /<[a-z].*<!doctype/i;
- foreach $line (@file[0..20]) {
- return 1, $xhtmlt_doctype if $line =~ /xmlns\s*=/i;
+ if ($line =~ /<!doctype/i) { # found a doctype
+ my $dttext = join '', @{$file}[$count .. $count + 5];
+ $dttext =~ s/\n//g;
+ $dttext =~ s/.*?doctype\s+html\s+public\s*([\'\"])([^\1]+?)\1.*/$2/i;
+ return 0, $dttext;
}
- foreach $line (@file[0..20]) {
- return 1, $html40f_doctype if $line =~ /<frame/i;
- }
-
- foreach $line (@file[0..20]) {
- return 1, $html40t_doctype if $line =~ /<(table|body )/i;
- }
-
- # go through the whole file
- foreach $line (@file) {
- return 1, $html40t_doctype if $line =~ /<(table|body )/i;
- }
-
- foreach $line (@file) {
- return 1, $html32_doctype if $line =~ /<center>/i;
- return 1, $html32_doctype if $line =~ /<[h0-9p]*\s*align\s*=\s*center>/i;
- }
-
- # no luck earlier; guess HTML 4.0 transitional
- return 1, $html40t_doctype;
+ $line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go;
+ # Strip comments, so the next line doesn't find commented-out markup etc.
+ # (this doesn't handle multi-line comments, unfortunately)
+ last if $line =~ /<[a-z]/i; # found an element
+ }
+ for (@{$file}[0 .. 20]) {
+ return 1, $xhtmlt_doctype if /xmlns\s*=/i;
+ return 1, $html40f_doctype if /<frame/i;
+ }
+ for (@{$file}) {
+ return 1, $html40t_doctype if /<(table|body )/i;
+ return 1, $html32_doctype if /<center>/i;
+ return 1, $html32_doctype if /<[h0-9p]*\s*align\s*=\s*center>/i;
+ }
+ return 1, $html40t_doctype; # no luck earlier; guess HTML 4.0 transitional
}
+
sub print_401_auth_required_message {
my $resource = shift;