summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlink <link@localhost>2001-07-16 07:56:34 +0000
committerlink <link@localhost>2001-07-16 07:56:34 +0000
commit049a9d41f497b6240ff6da1b534d81b21ebf2f97 (patch)
tree3b4d7faa3e76808953575dfe0e2aa31745351606
parent7d5d8070e217e9540a047ebd4f70a8dba77ac853 (diff)
downloadmarkup-validator-049a9d41f497b6240ff6da1b534d81b21ebf2f97.zip
markup-validator-049a9d41f497b6240ff6da1b534d81b21ebf2f97.tar.gz
markup-validator-049a9d41f497b6240ff6da1b534d81b21ebf2f97.tar.bz2
Refactor error parsing and reporting. Promote Non-SGML Numeric Character
References from Warnings to Errors; these will now make a document invalid!
-rwxr-xr-xhttpd/cgi-bin/check176
1 files changed, 86 insertions, 90 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 6756215..188dd24 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -9,7 +9,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.139 2001-07-08 11:59:50 duerst Exp $
+# $Id: check,v 1.140 2001-07-16 07:56:34 link Exp $
#
# We need Perl 5.004.
@@ -80,9 +80,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
#
# Strings
-$VERSION = q$Revision: 1.139 $;
+$VERSION = q$Revision: 1.140 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
-$DATE = q$Date: 2001-07-08 11:59:50 $;
+$DATE = q$Date: 2001-07-16 07:56:34 $;
$MAINTAINER = 'gerald@w3.org';
$NOTICE = ''; # "<p><strong>Note: This service will be ...</strong>";
@@ -502,9 +502,7 @@ open CHECKER, "|$command - >$temp.esis"
for (@{$File->{Content}}) {print CHECKER $_, "\n"};
close CHECKER;
-open ERRORS, "<$temp" or &internal_error("open($temp) returned: $!");
-$File->{Errors} = [<ERRORS>];
-close ERRORS or warn "close($temp) returned: $!\n";
+$File = &parse_errors($File, $temp); # Parse error output.
$File->{ESIS} = [];
my $elements_found = 0;
@@ -609,8 +607,12 @@ EOHD
EOHD
}
-if ($?) {&process_errors($File)}
-else {&report_valid($File)};
+if (scalar @{$File->{Errors}}) {
+ $q->param('ss'. TRUE);
+ &report_errors($File)
+} else {
+ &report_valid($File)
+}
&weblint() if $q->param('weblint');
&outline($File) if $q->param('outline');
@@ -1113,103 +1115,96 @@ sub supress_doctype {
}
#
-# Process errors reported by SP and produce a report.
-sub process_errors {
+# Parse errors reported by SP.
+sub parse_errors ($$) {
my $File = shift;
- my($line, $col, $type, $msg);
- print "<ul>\n";
- for (@{$File->{Errors}}) {
+ my $file = shift;
+
+ $File->{Errors} = []; # Initialize to an (empty) anonymous array ref.
+
+ open ERRORS, "<$file" or &internal_error("open($file) returned: $!");
+ for (<ERRORS>) {
+ my $err;
next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
- next if / numbers exceeding 65535 not supported$/;
- next if /^$sp:\(invalid location\):W: URL Redirected to/;
- s/^$sp:<OSFD>//g;
- unless (($line, $col, $type, $msg) = (/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/)) {
- print "Uh oh! I got the following unknown error:\n\n $_\n\n";
- print "Please make sure you specified the DOCTYPE properly!\n\n";
- &output_doctype_spiel;
- last;
- }
- if ($msg =~ /^cannot generate system identifier for entity /) {
- print "<p><b>Fatal error</b>! $msg\n\n";
- print "<p>I couldn't parse this document, because it " .
- "uses a public\n identifier that's not in my <a\n " .
- " href=\"sgml-lib/catalog\">catalog</a>!\n </p>\n";
- &output_doctype_spiel;
- last;
- }
- if ($msg =~ /unrecognized ({{)?DOCTYPE(}})?/i) {
- print "<p><b>Fatal error</b>! $msg\n\n";
- print "<p>I couldn't parse this document, because it " .
- "uses a public\n identifier that's not in my <a\n " .
- " href=\"sgml-lib/catalog\">catalog</a>!\n </p>\n";
- &output_doctype_spiel;
- last;
+ next if /numbers exceeding 65535 not supported/;
+ next if /URL Redirected to/;
+ my @errs = split /:/;
+ $err->{src} = $errs[1];
+ $err->{line} = $errs[2];
+ $err->{char} = $errs[3];
+ $err->{type} = $errs[4];
+ if ($err->{type} eq 'W' or $err->{type} eq 'E') {
+ $err->{msg} = $errs[5];
+ } else {
+ $err->{type} = 'I';
+ $err->{msg} = $errs[4];
}
- if ($msg =~ /^cannot open /) {
- print "<p>Fatal error! $msg\n\n";
- print "<p>I couldn't parse this document, because it " .
- "makes reference to\n a system-specific file instead of " .
- "using a well-known public identifier\n to specify the " .
- "level of HTML being used.\n </p>\n";
+ push @{$File->{Errors}}, $err;
+ }
+ close ERRORS or &internal_error("close($file) returned: $!\n");
+
+ return $File;
+}
+
+#
+# Generate a HTML report of detected errors.
+sub report_errors ($) {
+ my $File = shift;
+ foreach my $err (@{$File->{Errors}}) {
+
+ # An unknown FPI and no SI.
+ if ( $err->{msg} =~ m(cannot generate system identifier for entity)
+ or $err->{msg} =~ m(unrecognized ({{)?DOCTYPE(}})?)i) {
+ print <<" .EOF.";
+ <p><strong>Fatal Error</strong>: $err->{msg}</p>
+ <p>
+ I could not parse this document, because it uses a public identifier that
+ is not in my <a href="/sgml-lib/catalog">catalog</a>.
+ </p>
+ .EOF.
&output_doctype_spiel;
last;
}
- if ($msg =~ /^cannot find /) {
- print "<p>Fatal error! $msg\n\n";
- print "<p>I couldn't parse this document, because it " .
- "makes reference to\n a system-specific file instead of " .
- "using a well-known public identifier\n to specify the " .
- "level of HTML being used.\n </p>\n";
+
+ # No or unknown FPI and a relative SI.
+ if ($err->{msg} =~ m(cannot (open|find))) {
+ print <<" .EOF.";
+ <p><strong>Fatal Error: $err->{msg}</p>
+ <p>
+ I could not parse this document, because it makes reference to a
+ system-specific file instead of using a well-known public identifier to
+ specify the type of markup being used.
+ </p>
+ .EOF.
&output_doctype_spiel;
last;
}
- my $newline = $File->{Content}->[$line - 1];
- # make sure there are no ^P's or ^Q's in the file, since we need to use
- # them to represent '<' and '>' temporarily. We'll just change them to
- # literal P's and Q's for a lack of anything better to do with them.
- my $lt = "\020";
- my $gt = "\021";
- $newline =~ s/\020/P/go;
- $newline =~ s/\021/Q/g;
+ my($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
- my $orig_col = $col;
- ($newline, $col) = &truncate_line($newline, $col);
+ # Strip curlies from lq-nsgmls output.
+ $err->{msg} =~ s/[{}]//g;
+ # Find index into the %frag hash for the "explanation..." links.
+ $err->{idx} = $err->{msg};
+ $err->{idx} =~ s/"[^\"]+"/FOO/g;
+ $err->{idx} =~ s/[^A-Za-z ]//g;
- # temporarily strip curlies from lq-nsgmls output.
- # @@ should link HTML elements using $elem_db instead.
- $msg =~ s/[{}]//g;
+ $line = &ent($line); # Entity encode.
+ $line =~ s/\t/ /g; # Collapse TABs.
- # figure out the index into the %frag associative array for the
- # "explanation..." links to the KGV FAQ.
- my $msgindex = $msg;
- $msgindex =~ s/"[^\"]+"/FOO/g;
- $msgindex =~ s/[^A-Za-z ]//g;
+ print qq( <li>Line <a href="#line-$err->{line}">$err->{line}</a>, column $col:\n);
- $newline = &ent($newline);
- $newline =~ s/$lt/</g;
- $newline =~ s/$gt/>/g;
- $newline =~ s/\t/ /g;
- $newline =~ s/\r//g;
+ print "<pre> <code class=input>$line</code>\n";
+ print " " x ($col + 2); # 2 is the number of spaces before <code> above
+ print " " x 4 if $col != $err->{char}; # only for truncated lines
+ print "<span class=markup>^</span></pre>\n<p>\n";
- print " <li>";
- print qq{<a href="#line-$line">} if $q->param('ss');
- print "Line $line";
- print "</a>" if $q->param('ss');
- print ", column $orig_col:\n";
+ print qq{<span class="error">Error: $err->{msg}</span>};
- print "<pre> <code class=input>$newline</code>\n";
- print " " x ($col + 2); # 2 is the number of spaces before <code> above
- print " " x 4 if $col != $orig_col; # only for truncated lines
- print "<span class=markup>^</span></pre>\n";
- print "<p>\n";
-
- print qq{<span class=error>Error: $msg</span>};
-
- if (defined $frag->{$msgindex}) {
- print qq{ (<a href="$faqerrloc#$frag->{$msgindex}">explanation...</a>)};
+ if (defined $frag->{$err->{idx}}) {
+ print qq{ (<a href="$faqerrloc#$frag->{$err->{idx}}">explanation...</a>)};
}
print "</p>\n";
@@ -1218,15 +1213,16 @@ sub process_errors {
print "</ul>\n";
print "<hr>\n";
if ($version eq 'unknown') {
- print "\n <p>\n Sorry, I can't validate this document.\n </p>\n";
+ print "<p>Sorry, I can't validate this document.</p>";
} elsif ($File->{Type} eq 'xml') {
- print "\n <p>\n Sorry, this document is not well-formed XML.\n </p>\n";
+ print "<p>Sorry, this document is not well-formed XML.</p>";
} else {
- print "\n <p>\n Sorry, this document does not validate as $version.\n </p>\n\n";
+ print "<p>Sorry, this document does not validate as $version.</p>";
&output_css_validator_blurb($q->param('uri'));
}
}
+
#
# Output "This page is Valid" report.
sub report_valid {