summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xhttpd/cgi-bin/check137
1 files changed, 83 insertions, 54 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 7bea8b5..143196e 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.602 2008-08-26 14:59:13 ot Exp $
+# $Id: check,v 1.603 2008-08-26 16:09:17 ot Exp $
#
# Disable buffering on STDOUT!
@@ -191,7 +191,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.602 $;
+ $VERSION = q$Revision: 1.603 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -263,7 +263,7 @@ $File->{Charset}->{Override} = ''; # From CGI/user override.
#
# Misc simple types.
-$File->{Mode} = 'SGML'; # Default parse mode is SGML.
+$File->{Mode} = 'DTD+SGML'; # Default parse mode is DTD validation in SGML mode.
# By default, perform validation (we may perform only xml-wf in some cases)
$File->{XMLWF_ONLY} = FALSE;
@@ -833,9 +833,12 @@ sub html5_validate (\$) {
my $File = shift;
my $ua = new W3C::Validator::UserAgent ($CFG, $File);
my $html5_parser = "";
- if ($File->{Mode} eq 'XML') {
+ if ($File->{Mode} =~ /XML/) {
$html5_parser = "xml";
}
+
+ $File->{ParserName} = "validator.nu";
+ $File->{ParserOpts} = "";
$ua->env_proxy();
$ua->agent($File->{Opt}->{'User Agent'});
$ua->parse_head(0); # Don't parse the http-equiv stuff.
@@ -934,6 +937,10 @@ sub dtd_validate (\$) {
# default parsing options
my @spopt = qw(valid non-sgml-char-ref no-duplicate);
+ $File->{ParserName} = $parser_name;
+ $File->{ParserOpts} = join " ", @spopt;
+
+
#
# Switch to XML semantics if file is XML.
if (&is_xml($File)) {
@@ -966,39 +973,6 @@ sub dtd_validate (\$) {
# so restricted file reading would defunct the Validator.
$opensp->restrict_file_reading(1) unless $^O eq 'MSWin32';
- #
- # Set debug info for HTML report.
- $File->{Templates}->{Result}->param(opt_debug => $DEBUG);
- $File->{Templates}->{Result}->param(debug =>
- [
- map({name => $_, value => $ENV{$_}},
- qw(no_proxy http_proxy https_proxy ftp_proxy FTP_PASSIVE)),
- { name => 'Content-Encoding', value => $File->{ContentEnc} },
- { name => 'Content-Language', value => $File->{ContentLang} },
- { name => 'Content-Location', value => $File->{ContentLoc} },
- { name => 'Transfer-Encoding', value => $File->{TransferEnc} },
- { name => 'Parse Mode', value => $File->{Mode} },
- { name => 'Parse Mode Factor', value => $File->{ModeChoice} },
- { name => 'Parser', value => $parser_name },
- { name => 'Parser Options', value => join " ", @spopt },
- ],
- );
- $File->{Templates}->{SOAP}->param(opt_debug => $DEBUG);
- $File->{Templates}->{SOAP}->param(debug =>
- [
- map({name => $_, value => $ENV{$_}},
- qw(no_proxy http_proxy https_proxy ftp_proxy FTP_PASSIVE)),
- { name => 'Content-Encoding', value => $File->{ContentEnc} },
- { name => 'Content-Language', value => $File->{ContentLang} },
- { name => 'Content-Location', value => $File->{ContentLoc} },
- { name => 'Transfer-Encoding', value => $File->{TransferEnc} },
- { name => 'Parse Mode', value => $File->{Mode} },
- { name => 'Parse Mode Factor', value => $File->{ModeChoice} },
- { name => 'Parser', value => $parser_name },
- { name => 'Parser Options', value => join " ", @spopt },
-
- ],
- );
my $h; # event handler
if ($File->{Opt}->{'Outline'}) {
@@ -1273,9 +1247,29 @@ sub fin_template ($$) {
my $File = shift;
my $T = shift;
+
+
+ #
+ # Set debug info for HTML report.
+ $T->param(opt_debug => $DEBUG);
+ $T->param(debug =>
+ [
+ map({name => $_, value => $ENV{$_}},
+ qw(no_proxy http_proxy https_proxy ftp_proxy FTP_PASSIVE)),
+ { name => 'Content-Encoding', value => $File->{ContentEnc} },
+ { name => 'Content-Language', value => $File->{ContentLang} },
+ { name => 'Content-Location', value => $File->{ContentLoc} },
+ { name => 'Transfer-Encoding', value => $File->{TransferEnc} },
+ { name => 'Parse Mode', value => $File->{Mode} },
+ { name => 'Parse Mode Factor', value => $File->{ModeChoice} },
+ { name => 'Parser', value => $File->{ParserName} },
+ { name => 'Parser Options', value => $File->{ParserOpts} },
+ ],
+ );
+
if (! $File->{Doctype} and ($File->{Version} eq 'unknown' or $File->{Version} eq 'SGML' or (!$File->{Version}))) {
- my $default_doctype = ($File->{Mode} eq 'XML' ?
+ my $default_doctype = ($File->{Mode} =~ /XML/ ?
$File->{"Default DOCTYPE"}->{"XHTML"} : $File->{"Default DOCTYPE"}->{"HTML"});
$T->param(file_version => "$default_doctype");
}
@@ -2133,7 +2127,7 @@ sub preparse_doctype {
# if content-type has shown we should pre-parse with XML mode, use that
# otherwise (mostly text/html cases) use default mode
- $p->xml_mode(TRUE) if ($File->{Mode} eq 'XML');
+ $p->xml_mode(TRUE) if ($File->{Mode} =~ /XML/);
$p->ignore_elements('BODY');
$p->ignore_elements('body');
$p->handler(declaration => $dtd, 'text');
@@ -2339,6 +2333,7 @@ sub prepCGI {
# * HTTP Content-Type
# * Doctype Declaration
# * XML Declaration
+# * XML namespaces
sub set_parse_mode {
my $File = shift;
my $CFG = shift;
@@ -2346,6 +2341,8 @@ sub set_parse_mode {
$File->{ModeChoice} = '';
my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'} || 'TBD';
+ # $File->{Mode} may have been set in parse_content_type
+ # and it would come from the Media Type
my $parseModeFromMimeType = $File->{Mode};
my $begincontent = join "\x20",@{$File->{Content}}; # for the sake of xml decl detection,
# the 10 first lines should be safe
@@ -2390,8 +2387,8 @@ sub set_parse_mode {
if (($parseModeFromDoctype eq 'TBD') and ($parseModeFromXMLDecl eq 'TBD') and ($parseModeFromMimeType eq 'TBD') and ($parseModeFromNamespace eq 'TBD')) {
# if all factors are useless to give us a parse mode
- # => we use SGML as a default
- $File->{Mode} = 'SGML';
+ # => we use SGML-based DTD validation as a default
+ $File->{Mode} = 'DTD+SGML';
$File->{ModeChoice} = 'Fallback';
# and send warning about the fallback
&add_warning('W06', {
@@ -2414,28 +2411,60 @@ sub set_parse_mode {
}
# mime type has precedence, we stick to it
$File->{ModeChoice} = 'Mime';
+ if ($parseModeFromDoctype eq "HTML5") {
+ $File->{Mode} = 'HTML5+'.$File->{Mode};
+ } else {
+ $File->{Mode} = 'DTD+'.$File->{Mode};
+ }
return;
}
elsif ($parseModeFromDoctype ne 'TBD') {
# the mime type is ambiguous (hence we didn't stop at the previous test)
# but by now we're sure that the document type is a good indication
# so we use that.
- $File->{Mode} = $parseModeFromDoctype;
+ if ($parseModeFromDoctype eq "HTML5") {
+ if ($parseModeFromXMLDecl eq "XML" or $parseModeFromNamespace eq "XML") {
+ $File->{Mode} = "HTML5+XML";
+ }
+ else {
+ $File->{Mode} = "HTML5";
+ }
+ }
+ else { # not HTML5
+ $File->{Mode} = "DTD+".$parseModeFromDoctype;
+ }
$File->{ModeChoice} = 'Doctype';
return;
}
elsif ($parseModeFromXMLDecl ne 'TBD') {
# the mime type is ambiguous (hence we didn't stop at the previous test)
- # but by now we're sure that the document type is a good indication
+ # and so was the doctype
+ # but we found an XML declaration
# so we use that.
- $File->{Mode} = $parseModeFromXMLDecl;
+ if ($File->{Mode} eq "") {
+ $File->{Mode} = "DTD+".$parseModeFromXMLDecl;
+ }
+ elsif ($File->{Mode} =~ /\+/ ) {
+ $File->{Mode} =~ s/\+.*/\+$parseModeFromXMLDecl/;
+ }
+ else {
+ $File->{Mode} = $File->{Mode}."+".$parseModeFromXMLDecl;
+ }
$File->{ModeChoice} = 'XMLDecl';
return;
}
else {
- # this is the last case. We know that all three modes are not TBD,
- # yet both mime type and doctype tests have failed => we are saved by the XML declaration
- $File->{Mode} = $parseModeFromNamespace;
+ # this is the last case. We know that all modes are not TBD,
+ # yet mime type, doctype AND XML DECL tests have failed => we are saved by the presence of namespaces
+ if ($File->{Mode} eq "") {
+ $File->{Mode} = "DTD+".$parseModeFromNamespace;
+ }
+ elsif ($File->{Mode} =~ /\+/ ) {
+ $File->{Mode} =~ s/\+.*/\+$parseModeFromNamespace/;
+ }
+ else {
+ $File->{Mode} = $File->{Mode}."+".$parseModeFromNamespace;
+ }
$File->{ModeChoice} = 'Namespace';
}
}
@@ -2443,7 +2472,7 @@ sub set_parse_mode {
#
# Utility sub to tell if mode "is" XML.
-sub is_xml {shift->{Mode} eq 'XML'};
+sub is_xml {shift->{Mode} =~ /XML/};
#
# Check charset conflicts and add any warnings necessary.
@@ -2768,7 +2797,7 @@ sub start_element
my $has_xmlns = FALSE;
my $xmlns_value = undef;
- if ( ($self->{_file}->{Mode} eq 'XML')){
+ if ( ($self->{_file}->{Mode} =~ /XML/)){
# if in XML mode, find namespace used for each element
foreach my $attr (keys %{$element->{Attributes}}) {
if ($element->{Attributes}->{$attr}->{Name} eq "xmlns") {
@@ -2868,7 +2897,7 @@ sub error
# our parser OpenSP is not quite XML-aware, or XML Namespaces Aware,
# so we filter out a few errors for now
- if ($File->{Mode} eq 'XML') {
+ if ($File->{Mode} =~ /XML/) {
if ($err->{num} eq '108' and $err->{msg} =~ m{ "xmlns:\S+"}) {
# the error is about a missing xmlns: attribute definition"
return ; # this is not an error, 'cause we said so
@@ -2880,7 +2909,7 @@ sub error
# if root element is not html and mode is xml...
{
# since parsing was done without validation, result can only be "well-formed"
- if ($File->{Mode} eq 'XML' and lc($File->{Root}) ne 'html') {
+ if ($File->{Mode} =~ /XML/ and lc($File->{Root}) ne 'html') {
$File->{XMLWF_ONLY} = TRUE;
W3C::Validator::MarkupValidator::add_warning('W09xml', {});
return; # don't report this as an error, just proceed
@@ -2895,7 +2924,7 @@ sub error
# hoping to get the DTDs fixed, see http://lists.w3.org/Archives/Public/www-html-editor/2007AprJun/0010.html
return; # don't report this, just proceed
}
- if (($err->{num} eq '344') and ($File->{Namespace}) and ($File->{Mode} eq 'XML') ) {
+ if (($err->{num} eq '344') and ($File->{Namespace}) and ($File->{Mode} =~ /XML/) ) {
# we are in XML mode, we have a namespace, but no doctype.
# the validator will already have said "no doctype, falling back to default" above
# no need to report this.
@@ -2933,12 +2962,12 @@ sub error
# No DOCTYPE found! We are falling back to vanilla DTD
if ($err->{msg} =~ m(prolog can\'t be omitted)) {
if (lc($File->{Root}) eq 'html') {
- my $dtd = ($File->{Mode} eq 'XML' ?
+ my $dtd = ($File->{Mode} =~ /XML/ ?
$File->{"Default DOCTYPE"}->{"XHTML"} : $File->{"Default DOCTYPE"}->{"HTML"} );
W3C::Validator::MarkupValidator::add_warning('W09', {W09_dtd => $dtd});
}
else { # not html root element, we are not using fallback
- if ($File->{Mode} ne 'XML') {
+ if (! $File->{Mode} =~ /XML/) {
$File->{'Is Valid'} = FALSE;
W3C::Validator::MarkupValidator::add_warning('W09nohtml', {});
}