summaryrefslogtreecommitdiffstats
path: root/httpd/cgi-bin/check
diff options
context:
space:
mode:
Diffstat (limited to 'httpd/cgi-bin/check')
-rwxr-xr-xhttpd/cgi-bin/check324
1 files changed, 209 insertions, 115 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index afbad1e..c0cdcb5 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.592 2008-08-14 18:04:34 ot Exp $
+# $Id: check,v 1.593 2008-08-15 13:28:09 ot Exp $
#
# Disable buffering on STDOUT!
@@ -118,6 +118,9 @@ BEGIN {
Paths => {
Base => ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
},
+ External => {
+ HTML5 => FALSE,
+ },
},
);
my %cfg = Config::General->new(%config_opts)->getall();
@@ -188,7 +191,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.592 $;
+ $VERSION = q$Revision: 1.593 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -662,20 +665,24 @@ if ($File->{Charset}->{Use} eq 'utf-8' &&
#
# Override DOCTYPE if user asked for it.
-if ($File->{Opt}->{DOCTYPE}
- and not $File->{Opt}->{DOCTYPE} =~ /(Inline|detect)/i) {
- $File = &override_doctype($File);
+if ($File->{Opt}->{DOCTYPE}) {
+ if ($File->{Opt}->{DOCTYPE} eq "HTML5") {
+ $File->{DOCTYPE} = "HTML5";
+ $File->{Version} = $File->{DOCTYPE};
+ }
+ elsif (not $File->{Opt}->{DOCTYPE} =~ /(Inline|detect)/i) {
+ $File = &override_doctype($File);
+ }
}
-#
-# Try to extract a DOCTYPE or xmlns.
-$File = &preparse_doctype($File);
-
-#
-# Determine the parse mode (XML or SGML).
-##set_parse_mode($File, $CFG) if $File->{DOCTYPE};
+if ($File->{Opt}->{DOCTYPE} eq "HTML5") {
+
+}
+else {
+ # Try to extract a DOCTYPE or xmlns.
+ $File = &preparse_doctype($File);
+}
set_parse_mode($File, $CFG);
-
#
# Sanity check Charset information and add any warnings necessary.
$File = &charset_conflicts($File);
@@ -692,124 +699,213 @@ $File->{Errors} = [];
# ditto, we should try using W3C::Validator::EventHandler,
# but it's badly linked to opensp at the moment
if (&is_xml($File)) {
-
- my $xmlparser = XML::LibXML->new();
- $xmlparser->line_numbers(1);
- $xmlparser->validation(0);
- $xmlparser->load_ext_dtd(0);
- # [NOT] loading the XML catalog for entities resolution as it seems to cause a lot of unnecessary DTD/entities fetching (requires >= 1.53 if enabled)
- #$xmlparser->load_catalog( File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc') );
- my $xml_string = join"\n",@{$File->{Content}};
- # the XML parser will check the value of encoding attribute in XML declaration
- # so we have to amend it to reflect transcoding. see Bug 4867
- $xml_string =~ s/(<\?xml.*)
-(encoding[\x20|\x09|\x0D|\x0A]*=[\x20|\x09|\x0D|\x0A]*(?:"[A-Za-z][a-zA-Z0-9_-]+"|'[A-Za-z][a-zA-Z0-9_-]+'))
-(.*\?>)/$1encoding="utf-8"$3/sx;
- eval {
- $xmlparser->parse_string($xml_string);
- };
- $xml_string = undef;
- my $xml_parse_errors_line = undef;
- my @xmlwf_error_list;
- if ($@) {
-
- my $xmlwf_errors = $@;
- my $xmlwf_error_line = undef;
- my $xmlwf_error_col = undef;
- my $xmlwf_error_msg = undef;
- my $got_error_message = 0;
- my $got_quoted_line = 0;
- my $num_xmlwf_error = 0;
- foreach my $msg_line (split "\n", $xmlwf_errors){
-
- $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
- $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
-
- # first we get the actual error message
- if (($got_error_message eq 0) and ($msg_line =~ /^(:\d+:)( parser error : .*)/ )) {
- $xmlwf_error_line = $1;
- $xmlwf_error_msg = $2;
- $xmlwf_error_line =~ s/:(\d+):/$1/;
- $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
- $got_error_message = 1;
- }
- # then we skip the second line, which shows the context (we don't use that)
- elsif (($got_error_message eq 1) and ($got_quoted_line eq 0)) {
- $got_quoted_line = 1;
- }
- # we now take the third line, with the pointer to the error's column
- elsif (($msg_line =~ /(\s+)\^/) and ($got_error_message eq 1) and ($got_quoted_line eq 1)) {
- $xmlwf_error_col = length($1);
- }
-
- # cleanup for a number of bugs for the column number
- if (defined($xmlwf_error_col)) {
- if ((my $l = length($File->{Content}->[$xmlwf_error_line-1])) < $xmlwf_error_col) {
- # http://bugzilla.gnome.org/show_bug.cgi?id=434196
- #warn("Warning: reported error column larger than line length " .
- # "($xmlwf_error_col > $l) in $File->{URI} line " .
- # "$xmlwf_error_line, libxml2 bug? Resetting to line length.");
- $xmlwf_error_col = $l;
+ if ($File->{DOCTYPE} eq "HTML5")
+ {
+ $File->{DOCTYPE} = "XHTML5";
+ $File->{Version} = "XHTML5";
+ }
+ else {
+ my $xmlparser = XML::LibXML->new();
+ $xmlparser->line_numbers(1);
+ $xmlparser->validation(0);
+ $xmlparser->load_ext_dtd(0);
+ # [NOT] loading the XML catalog for entities resolution as it seems to cause a lot of unnecessary DTD/entities fetching (requires >= 1.53 if enabled)
+ #$xmlparser->load_catalog( File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc') );
+ my $xml_string = join"\n",@{$File->{Content}};
+ # the XML parser will check the value of encoding attribute in XML declaration
+ # so we have to amend it to reflect transcoding. see Bug 4867
+ $xml_string =~ s/(<\?xml.*)
+ (encoding[\x20|\x09|\x0D|\x0A]*=[\x20|\x09|\x0D|\x0A]*(?:"[A-Za-z][a-zA-Z0-9_-]+"|'[A-Za-z][a-zA-Z0-9_-]+'))
+ (.*\?>)/$1encoding="utf-8"$3/sx;
+ eval {
+ $xmlparser->parse_string($xml_string);
+ };
+ $xml_string = undef;
+ my $xml_parse_errors_line = undef;
+ my @xmlwf_error_list;
+ if ($@) {
+
+ my $xmlwf_errors = $@;
+ my $xmlwf_error_line = undef;
+ my $xmlwf_error_col = undef;
+ my $xmlwf_error_msg = undef;
+ my $got_error_message = 0;
+ my $got_quoted_line = 0;
+ my $num_xmlwf_error = 0;
+ foreach my $msg_line (split "\n", $xmlwf_errors){
+
+ $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
+ $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
+
+ # first we get the actual error message
+ if (($got_error_message eq 0) and ($msg_line =~ /^(:\d+:)( parser error : .*)/ )) {
+ $xmlwf_error_line = $1;
+ $xmlwf_error_msg = $2;
+ $xmlwf_error_line =~ s/:(\d+):/$1/;
+ $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
+ $got_error_message = 1;
}
- elsif ($xmlwf_error_col == 79) {
- # working around an apparent odd limitation of libxml
- # which only gives context for lines up to 80 chars
- # http://www.w3.org/Bugs/Public/show_bug.cgi?id=4420
- # http://bugzilla.gnome.org/show_bug.cgi?id=424017
- $xmlwf_error_col = "> 80";
- # non-int line number will trigger the proper behavior in report_error
+ # then we skip the second line, which shows the context (we don't use that)
+ elsif (($got_error_message eq 1) and ($got_quoted_line eq 0)) {
+ $got_quoted_line = 1;
+ }
+ # we now take the third line, with the pointer to the error's column
+ elsif (($msg_line =~ /(\s+)\^/) and ($got_error_message eq 1) and ($got_quoted_line eq 1)) {
+ $xmlwf_error_col = length($1);
}
- }
- # when we have all the info (one full error message), proceed and move on to the next error
- if ((defined $xmlwf_error_line) and (defined $xmlwf_error_col) and (defined $xmlwf_error_msg)){
- # Reinitializing for the next batch of 3 lines
- $got_error_message = 0;
- $got_quoted_line = 0;
-
- # formatting the error message for output
- my $err;
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $xmlwf_error_line;
- $err->{char} = $xmlwf_error_col;
- $err->{num} = 'xmlwf';
- $err->{type} = "E";
- $err->{msg} = $xmlwf_error_msg;
+ # cleanup for a number of bugs for the column number
+ if (defined($xmlwf_error_col)) {
+ if ((my $l = length($File->{Content}->[$xmlwf_error_line-1])) < $xmlwf_error_col) {
+ # http://bugzilla.gnome.org/show_bug.cgi?id=434196
+ #warn("Warning: reported error column larger than line length " .
+ # "($xmlwf_error_col > $l) in $File->{URI} line " .
+ # "$xmlwf_error_line, libxml2 bug? Resetting to line length.");
+ $xmlwf_error_col = $l;
+ }
+ elsif ($xmlwf_error_col == 79) {
+ # working around an apparent odd limitation of libxml
+ # which only gives context for lines up to 80 chars
+ # http://www.w3.org/Bugs/Public/show_bug.cgi?id=4420
+ # http://bugzilla.gnome.org/show_bug.cgi?id=424017
+ $xmlwf_error_col = "> 80";
+ # non-int line number will trigger the proper behavior in report_error
+ }
+ }
- # The validator will sometimes fail to dereference entities files
- # we're filtering the bogus resulting error
- if ($err->{msg} =~ /Entity '\w+' not defined/) {
+ # when we have all the info (one full error message), proceed and move on to the next error
+ if ((defined $xmlwf_error_line) and (defined $xmlwf_error_col) and (defined $xmlwf_error_msg)){
+ # Reinitializing for the next batch of 3 lines
+ $got_error_message = 0;
+ $got_quoted_line = 0;
+
+ # formatting the error message for output
+ my $err;
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $xmlwf_error_line;
+ $err->{char} = $xmlwf_error_col;
+ $err->{num} = 'xmlwf';
+ $err->{type} = "E";
+ $err->{msg} = $xmlwf_error_msg;
+
+ # The validator will sometimes fail to dereference entities files
+ # we're filtering the bogus resulting error
+ if ($err->{msg} =~ /Entity '\w+' not defined/) {
+ $xmlwf_error_line = undef;
+ $xmlwf_error_col = undef;
+ $xmlwf_error_msg = undef;
+ next;
+ }
+ push (@xmlwf_error_list, $err);
$xmlwf_error_line = undef;
$xmlwf_error_col = undef;
$xmlwf_error_msg = undef;
- next;
- }
- push (@xmlwf_error_list, $err);
- $xmlwf_error_line = undef;
- $xmlwf_error_col = undef;
- $xmlwf_error_msg = undef;
- $num_xmlwf_error++;
+ $num_xmlwf_error++;
+ }
+ }
+ foreach my $errmsg (@xmlwf_error_list){
+ $File->{'Is Valid'} = FALSE;
+ push @{$File->{WF_Errors}}, $errmsg;
}
- }
- foreach my $errmsg (@xmlwf_error_list){
- $File->{'Is Valid'} = FALSE;
- push @{$File->{WF_Errors}}, $errmsg;
}
}
-
}
-#
-# Abandon all hope ye who enter here...
-$File = &parse($File);
-sub parse (\$) {
+if (($File->{DOCTYPE} eq "HTML5") or ($File->{DOCTYPE} eq "XHTML5")) {
+ if ($CFG->{External}->{HTML5}) {
+ $File = &html5_validate($File);
+ }
+ else {
+ $File->{'Error Flagged'} = TRUE;
+ $File->{Templates}->{Error}->param(fatal_no_checker => TRUE);
+ $File->{Templates}->{Error}->param(fatal_missing_checker => "HTML5 Validator");
+ }
+}
+else {
+ $File = &dtd_validate($File);
+}
+&abort_if_error_flagged($File, 0);
+
+sub html5_validate (\$) {
my $File = shift;
+ my $ua = new W3C::Validator::UserAgent ($CFG, $File);
+ my $html5_parser = "";
+ if ($File->{Mode} eq 'XML') {
+ $html5_parser = "xml";
+ }
+ $ua->env_proxy();
+ $ua->agent($File->{Opt}->{'User Agent'});
+ $ua->parse_head(0); # Don't parse the http-equiv stuff.
+ eval { require HTTP::Request::Common;};
+ if ($@) {
+ warn "HTTP::Request::Common needs to be installed to check HTML5 content";
+ return $File;
+ }
+ use HTTP::Request::Common;
+ # telling caches in the middle we want a fresh copy (Bug 4998)
+ $ua->default_header(Cache_control=> "max-age=0");
+
+ my $res = $ua->request(POST "$CFG->{External}->{HTML5}", Content_Type => 'form-data',
+ Content => [out => "xml", parser=>$html5_parser, content => $File->{Bytes}]);
+ if (! $res->is_success()) {
+ $File->{'Error Flagged'} = TRUE;
+ $File->{Templates}->{Error}->param(fatal_no_checker => TRUE);
+ $File->{Templates}->{Error}->param(fatal_missing_checker => "HTML5 Validator");
+ }
+ else {
+ my $content = $res->can('decoded_content') ?
+ $res->decoded_content(charset => 'none') : $res->content;
+ # and now we parse according to http://wiki.whatwg.org/wiki/Validator.nu_XML_Output
+ # I wish we could use XML::LibXML::Reader here. but SHAME on those major
+ # unix distributions still shipping with libxml2 2.6.16… 4 years after its release
+ my $xml_reader = XML::LibXML->new();
+ my $xmlDOM = $xml_reader->parse_string( $content);
+ my @nodelist = $xmlDOM->getElementsByTagName("messages");
+ my $messages_node = $nodelist[0];
+ my @message_nodes = $messages_node->childNodes;
+ # @@ TODO locator attributes
+ foreach my $message_node (@message_nodes) {
+ my $message_type = $message_node->localname;
+ my $err;
+ my ($html5_error_line, $html5_error_col, $html5_error_msg);
+ if ($message_type eq "error") {
+ $err->{type} = "E";
+ }
+ elsif ($message_type eq "info") {
+ $err->{type} = "I";
+ if ($message_node->hasAttributes()) {
+ my @attributelist = $message_node->attributes();
+ foreach my $attribute (@attributelist) {
+ #@@ TODO parse attributes, find out if it is a warning
+ }
+ }
+ }
+ my @child_nodes = $message_node->childNodes;
+ foreach my $child_node (@child_nodes) {
+ if ($child_node->localname eq "message") {
+ $html5_error_msg = $child_node->toString();
+ $html5_error_msg =~ s,</?message>,,gi;
+ }
+ }
+ # formatting the error message for output
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $html5_error_line;
+ $err->{char} = $html5_error_col;
+ $err->{num} = 'html5';
+ $err->{msg} = $html5_error_msg;
+ push @{$File->{Errors}}, $err;
+ # @@ TODO message explanation / elaboration
+ }
+ }
+return $File;
+}
- # TODO switch parser on the fly
+sub dtd_validate (\$) {
+ my $File = shift;
my $opensp = SGML::Parser::OpenSP->new();
my $parser_name = "SGML::Parser::OpenSP";
#
@@ -1160,8 +1256,6 @@ sub fin_template ($$) {
if (! $File->{Doctype} and ($File->{Version} eq 'unknown' or $File->{Version} eq 'SGML' or (!$File->{Version}))) {
- # @@TODO@@ we should try falling back on other version
- # info, such as the ones stored in Version_ESIS
my $default_doctype = ($File->{Mode} eq 'XML' ?
$File->{"Default DOCTYPE"}->{"XHTML"} : $File->{"Default DOCTYPE"}->{"HTML"});
$T->param(file_version => "$default_doctype");