summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorville <ville@localhost>2010-06-13 21:45:40 +0000
committerville <ville@localhost>2010-06-13 21:45:40 +0000
commitd0b215afb86f2352523062bec5db14c4f67bf100 (patch)
treeacd6b580717d8235d6934edd6efcfe56785ea940
parent4abf4d222b46cab9d6695945ea5e67389bc47766 (diff)
downloadmarkup-validator-d0b215afb86f2352523062bec5db14c4f67bf100.zip
markup-validator-d0b215afb86f2352523062bec5db14c4f67bf100.tar.gz
markup-validator-d0b215afb86f2352523062bec5db14c4f67bf100.tar.bz2
Bring back support for non-structured XML::LibXML errors.
This was removed in rev 1.769, but according to the docs, 1.70 may still throw them.
-rwxr-xr-xhttpd/cgi-bin/check172
1 files changed, 148 insertions, 24 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index eca0ce3..9208ef2 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.778 2010-06-10 22:19:55 ville Exp $
+# $Id: check,v 1.779 2010-06-13 21:45:40 ville Exp $
#
# We need Perl 5.8.0+.
@@ -192,7 +192,7 @@ EOF
#
# Strings
- $VERSION = q$Revision: 1.778 $;
+ $VERSION = q$Revision: 1.779 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
# Read friendly error message file
@@ -652,35 +652,152 @@ if (&is_xml($File)) {
eval { $xmlparser->parse_string($xml_string); };
$xml_string = undef;
+ my $xml_parse_errors_line = undef;
+ my @xmlwf_error_list;
- my $err_obj = $@;
- while ($err_obj) {
- my $err;
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $err_obj->line();
- $err->{char} = $err_obj->column();
- $err->{num} = "libxml2-" . $err_obj->code();
- $err->{type} = "E";
- $err->{msg} = $err_obj->message();
-
- $err_obj = $err_obj->_prev();
-
- # The validator will sometimes fail to dereference entities
- # files; we're filtering the resulting bogus error for
- # non-standalone documents. @@@TODO: is this still needed?
- if (!$standalone &&
- $err->{msg} =~ /Entity '\w+' not defined/)
- {
- $err = undef;
- next;
+ if (ref($@)) {
+
+ # handle a structured error (XML::LibXML::Error object)
+
+ my $err_obj = $@;
+ my $num_xmlwf_error = 0;
+ while ($err_obj) {
+ my $err;
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $err_obj->line();
+ $err->{char} = $err_obj->column();
+ $err->{num} = "libxml2-" . $err_obj->code();
+ $err->{type} = "E";
+ $err->{msg} = $err_obj->message();
+
+ $err_obj = $err_obj->_prev();
+
+ # The validator will sometimes fail to dereference entities
+ # files; we're filtering the resulting bogus error for
+ # non-standalone documents. @@@TODO: is this still needed?
+ if (!$standalone &&
+ $err->{msg} =~ /Entity '\w+' not defined/)
+ {
+ $err = undef;
+ next;
+ }
+
+ unshift(@xmlwf_error_list, $err);
+ $num_xmlwf_error++;
}
+ }
+ elsif ($@) {
+ my $xmlwf_errors = $@;
+ my $xmlwf_error_line = undef;
+ my $xmlwf_error_col = undef;
+ my $xmlwf_error_msg = undef;
+ my $got_error_message = undef;
+ my $got_quoted_line = undef;
+ my $num_xmlwf_error = 0;
+ foreach my $msg_line (split "\n", $xmlwf_errors) {
+
+ $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
+ $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
+
+ # first we get the actual error message
+ if (!$got_error_message &&
+ $msg_line =~ /^(:\d+:)( parser error : .*)/)
+ {
+ $xmlwf_error_line = $1;
+ $xmlwf_error_msg = $2;
+ $xmlwf_error_line =~ s/:(\d+):/$1/;
+ $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
+ $got_error_message = 1;
+ }
+
+ # then we skip the second line, which shows the context
+ # (we don't use that)
+ elsif ($got_error_message && !$got_quoted_line) {
+ $got_quoted_line = 1;
+ }
+
+ # we now take the third line, with the pointer to the error's
+ # column
+ elsif (($msg_line =~ /(\s+)\^/) and
+ $got_error_message and
+ $got_quoted_line)
+ {
+ $xmlwf_error_col = length($1);
+ }
+ # cleanup for a number of bugs for the column number
+ if (defined($xmlwf_error_col)) {
+ if (( my $l =
+ length($File->{Content}->[$xmlwf_error_line - 1])
+ ) < $xmlwf_error_col
+ )
+ {
+
+ # http://bugzilla.gnome.org/show_bug.cgi?id=434196
+ #warn("Warning: reported error column larger than line length " .
+ # "($xmlwf_error_col > $l) in $File->{URI} line " .
+ # "$xmlwf_error_line, libxml2 bug? Resetting to line length.");
+ $xmlwf_error_col = $l;
+ }
+ elsif ($xmlwf_error_col == 79) {
+
+ # working around an apparent odd limitation of libxml
+ # which only gives context for lines up to 80 chars
+ # http://www.w3.org/Bugs/Public/show_bug.cgi?id=4420
+ # http://bugzilla.gnome.org/show_bug.cgi?id=424017
+ $xmlwf_error_col = "> 80";
+
+ # non-int line number will trigger the proper behavior
+ # in report_error
+ }
+ }
+
+ # when we have all the info (one full error message), proceed
+ # and move on to the next error
+ if ((defined $xmlwf_error_line) and
+ (defined $xmlwf_error_col) and
+ (defined $xmlwf_error_msg))
+ {
+
+ # Reinitializing for the next batch of 3 lines
+ $got_error_message = undef;
+ $got_quoted_line = undef;
+
+ # formatting the error message for output
+ my $err;
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $xmlwf_error_line;
+ $err->{char} = $xmlwf_error_col;
+ $err->{num} = 'xmlwf';
+ $err->{type} = "E";
+ $err->{msg} = $xmlwf_error_msg;
+
+ # The validator will sometimes fail to dereference entities
+ # files; we're filtering the resulting bogus error for
+ # non-standalone documents. @@@TODO: is this still needed?
+ if (!$standalone &&
+ $err->{msg} =~ /Entity '\w+' not defined/)
+ {
+ $xmlwf_error_line = undef;
+ $xmlwf_error_col = undef;
+ $xmlwf_error_msg = undef;
+ next;
+ }
+ push(@xmlwf_error_list, $err);
+ $xmlwf_error_line = undef;
+ $xmlwf_error_col = undef;
+ $xmlwf_error_msg = undef;
+ $num_xmlwf_error++;
+
+ }
+ }
+ }
+ foreach my $errmsg (@xmlwf_error_list) {
$File->{'Is Valid'} = FALSE;
- unshift(@{$File->{WF_Errors}}, $err);
+ push @{$File->{WF_Errors}}, $errmsg;
}
}
}
-
if (($File->{DOCTYPE} eq "HTML5") or ($File->{DOCTYPE} eq "XHTML5")) {
if ($CFG->{External}->{HTML5}) {
$File = &html5_validate($File);
@@ -867,6 +984,13 @@ if ($File->{Opt}->{Output} eq 'json') {
for my $key (qw(msg expl)) {
$msg->{$key} = $json->encode($msg->{$key}) if $msg->{$key};
}
+
+ # Drop non-numeric char indicators from output, e.g.
+ # "> 80" for some XML parse error ones (see the non-structured
+ # XML::LibXML code branch in XML preparsing below).
+ if ($msg->{char} && $msg->{char} !~ /^\d+$/) {
+ delete($msg->{char});
+ }
}
}
}