summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorville <ville@localhost>2009-11-23 22:15:19 +0000
committerville <ville@localhost>2009-11-23 22:15:19 +0000
commit5b60b407ff2794593e3ccffd7533d5e2449e08c8 (patch)
tree2eca829ccbc61f847db1be6f4d6cfa934055b073
parent675a1c5355fc61459257de958e7090837e36507a (diff)
downloadmarkup-validator-5b60b407ff2794593e3ccffd7533d5e2449e08c8.zip
markup-validator-5b60b407ff2794593e3ccffd7533d5e2449e08c8.tar.gz
markup-validator-5b60b407ff2794593e3ccffd7533d5e2449e08c8.tar.bz2
Run perltidy on perl sources.
-rwxr-xr-xhttpd/cgi-bin/check5439
-rwxr-xr-xhttpd/cgi-bin/sendfeedback.pl154
-rw-r--r--misc/bundle/Makefile.PL70
-rwxr-xr-xmisc/docs_errors.pl183
-rwxr-xr-xmisc/spmpp.pl18
5 files changed, 3124 insertions, 2740 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 94cb85d..b0bf649 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.719 2009-11-16 19:50:26 ville Exp $
+# $Id: check,v 1.720 2009-11-23 22:15:18 ville Exp $
#
# Disable buffering on STDOUT!
$| = 1;
@@ -33,8 +33,8 @@ use strict;
use warnings;
use utf8;
-
package W3C::Validator::MarkupValidator;
+
#
# Modules. See also the BEGIN block further down below.
#
@@ -44,26 +44,26 @@ package W3C::Validator::MarkupValidator;
# polluting our namespace.
#
-use CGI 2.81 qw(-newstyle_urls -private_tempfiles redirect);
-use CGI::Carp qw(carp croak fatalsToBrowser);
-use Config::General 2.32 qw(); # Need 2.32 for <msg 0>, rt.cpan.org#17852
-use Encode qw();
-use Encode::Alias qw();
-use Encode::HanExtra qw(); # for some chinese character encodings,
- # e.g gb18030
-use File::Spec::Functions qw(catfile rel2abs tmpdir);
-use HTML::Encoding 0.52 qw();
-use HTML::Parser 3.24 qw(); # Need 3.24 for $p->parse($code_ref)
-use HTML::Template 2.6 qw(); # Need 2.6 for path param, other things.
-use HTTP::Headers::Util qw();
-use HTTP::Message 1.52 qw(); # Need 1.52 for decoded_content()
-use HTTP::Request qw();
-use HTTP::Headers::Auth qw(); # Needs to be imported after other HTTP::*.
-use JSON 2.00 qw();
+use CGI 2.81 qw(-newstyle_urls -private_tempfiles redirect);
+use CGI::Carp qw(carp croak fatalsToBrowser);
+use Config::General 2.32 qw(); # Need 2.32 for <msg 0>, rt.cpan.org#17852
+use Encode qw();
+use Encode::Alias qw();
+use Encode::HanExtra qw(); # for some chinese character encodings,
+ # e.g gb18030
+use File::Spec::Functions qw(catfile rel2abs tmpdir);
+use HTML::Encoding 0.52 qw();
+use HTML::Parser 3.24 qw(); # Need 3.24 for $p->parse($code_ref)
+use HTML::Template 2.6 qw(); # Need 2.6 for path param, other things.
+use HTTP::Headers::Util qw();
+use HTTP::Message 1.52 qw(); # Need 1.52 for decoded_content()
+use HTTP::Request qw();
+use HTTP::Headers::Auth qw(); # Needs to be imported after other HTTP::*.
+use JSON 2.00 qw();
use SGML::Parser::OpenSP 0.991 qw();
-use URI qw();
-use URI::Escape qw(uri_escape);
-use XML::LibXML qw();
+use URI qw();
+use URI::Escape qw(uri_escape);
+use XML::LibXML qw();
###############################################################################
#### Constant definitions. ####################################################
@@ -76,160 +76,166 @@ use constant FALSE => 0;
#
# Tentative Validation Severities.
-use constant T_WARN => 4; # 0000 0100
-use constant T_ERROR => 8; # 0000 1000
+use constant T_WARN => 4; # 0000 0100
+use constant T_ERROR => 8; # 0000 1000
#
# Output flags for error processing
-use constant O_SOURCE => 1; # 0000 0001
-use constant O_CHARSET => 2; # 0000 0010
-use constant O_DOCTYPE => 4; # 0000 0100
-use constant O_NONE => 8; # 0000 1000
+use constant O_SOURCE => 1; # 0000 0001
+use constant O_CHARSET => 2; # 0000 0010
+use constant O_DOCTYPE => 4; # 0000 0100
+use constant O_NONE => 8; # 0000 1000
#
# Define global variables.
use vars qw($DEBUG $CFG %RSRC $VERSION);
use constant IS_MODPERL2 =>
- (exists($ENV{MOD_PERL_API_VERSION}) && $ENV{MOD_PERL_API_VERSION} >= 2);
+ (exists($ENV{MOD_PERL_API_VERSION}) && $ENV{MOD_PERL_API_VERSION} >= 2);
#
# Things inside BEGIN don't happen on every request in persistent environments
# (such as mod_perl); so let's do the globals, eg. read config, here.
BEGIN {
- # Launder data for -T; -AutoLaunder doesn't catch this one.
- if (exists $ENV{W3C_VALIDATOR_HOME}) {
- $ENV{W3C_VALIDATOR_HOME} =~ /^(.*)$/;
- $ENV{W3C_VALIDATOR_HOME} = $1;
- }
-
- #
- # Read Config Files.
- eval {
- my %config_opts = (
- -ConfigFile => ($ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf'),
- -MergeDuplicateOptions => TRUE,
- -MergeDuplicateBlocks => TRUE,
- -SplitPolicy => 'equalsign',
- -UseApacheInclude => TRUE,
- -IncludeRelative => TRUE,
- -InterPolateVars => TRUE,
- -AutoLaunder => TRUE,
- -AutoTrue => TRUE,
- -DefaultConfig => {
- Protocols => {Allow => 'http,https'},
- Paths => {
- Base => ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
- Cache => '',
- },
- External => {
- HTML5 => FALSE,
- },
- },
- );
- my %cfg = Config::General->new(%config_opts)->getall();
- $CFG = \%cfg;
- };
- if ($@) {
- die <<".EOF.";
+
+ # Launder data for -T; -AutoLaunder doesn't catch this one.
+ if (exists $ENV{W3C_VALIDATOR_HOME}) {
+ $ENV{W3C_VALIDATOR_HOME} =~ /^(.*)$/;
+ $ENV{W3C_VALIDATOR_HOME} = $1;
+ }
+
+ #
+ # Read Config Files.
+ eval {
+ my %config_opts = (
+ -ConfigFile =>
+ ($ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf'),
+ -MergeDuplicateOptions => TRUE,
+ -MergeDuplicateBlocks => TRUE,
+ -SplitPolicy => 'equalsign',
+ -UseApacheInclude => TRUE,
+ -IncludeRelative => TRUE,
+ -InterPolateVars => TRUE,
+ -AutoLaunder => TRUE,
+ -AutoTrue => TRUE,
+ -DefaultConfig => {
+ Protocols => {Allow => 'http,https'},
+ Paths => {
+ Base =>
+ ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
+ Cache => '',
+ },
+ External => {HTML5 => FALSE,},
+ },
+ );
+ my %cfg = Config::General->new(%config_opts)->getall();
+ $CFG = \%cfg;
+ };
+ if ($@) {
+ die <<".EOF.";
Could not read configuration. Set the W3C_VALIDATOR_CFG environment variable
or copy conf/* to /etc/w3c/. Make sure that the configuration file and all
included files are readable by the web server user. The error was:\n'$@'
.EOF.
- }
-
- #
- # Check a filesystem path for existance and "readability".
- sub pathcheck (@) {
- my %paths = map { $_ => [-d $_, -r _] } @_;
- my @_d = grep {not $paths{$_}->[0]} keys %paths;
- my @_r = grep {not $paths{$_}->[1]} keys %paths;
- return TRUE if (scalar(@_d) + scalar(@_r) == 0);
- die <<".EOF." if scalar @_d;
+ }
+
+ #
+ # Check a filesystem path for existance and "readability".
+ sub pathcheck (@)
+ {
+ my %paths = map { $_ => [-d $_, -r _] } @_;
+ my @_d = grep { not $paths{$_}->[0] } keys %paths;
+ my @_r = grep { not $paths{$_}->[1] } keys %paths;
+ return TRUE if (scalar(@_d) + scalar(@_r) == 0);
+ die <<".EOF." if scalar @_d;
Does not exist or is not a directory: @_d
.EOF.
- die <<".EOF." if scalar @_r;
+ die <<".EOF." if scalar @_r;
Directory not readable (permission denied): @_r
.EOF.
- }
-
- #
- # Check paths in config...
- # @@FIXME: This does not do a very good job error-message-wise if a path is
- # @@FIXME: missing...;
- {
- my @dirs = ();
- push @dirs, $CFG->{Paths}->{Base};
- push @dirs, $CFG->{Paths}->{Templates};
- push @dirs, $CFG->{Paths}->{SGML}->{Library};
- &pathcheck(@dirs);
- }
-
- #
- # Split allowed protocols into a list.
- if (my $allowed = delete($CFG->{Protocols}->{Allow})) {
- $CFG->{Protocols}->{Allow} = [split(/\s*,\s*/, $allowed)];
- }
-
- # Split available languages into a list
- if (my $langs = delete($CFG->{Languages})) {
- $CFG->{Languages} = [split(/\s+/, $langs)];
- } else {
- # Default to english
- $CFG->{Languages} = ["en"];
- }
-
- { # Make types config indexed by FPI.
- my $_types = {};
- map { $_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_} }
- keys %{$CFG->{Types}};
- $CFG->{Types} = $_types;
- }
-
- #
- # Change strings to internal constants in MIME type mapping.
- for (keys %{$CFG->{MIME}}) {
- $CFG->{MIME}->{$_} = 'TBD' unless $CFG->{MIME}->{$_} eq 'SGML'
- or $CFG->{MIME}->{$_} eq 'XML';
- }
-
- #
- # Register Encode aliases.
- while (my ($key, $value) = each %{$CFG->{Charsets}}) {
- Encode::Alias::define_alias($key, $1) if ($value =~ /^[AX] (\S+)/);
- }
-
- #
- # Set debug flag.
- if ($CFG->{'Allow Debug'}) {
- $DEBUG = TRUE if $ENV{W3C_VALIDATOR_DEBUG} || $CFG->{'Enable Debug'};
- } else {
- $DEBUG = FALSE;
- }
-
- #
- # Strings
- $VERSION = q$Revision: 1.719 $;
- $VERSION =~ s/Revision: ([\d\.]+) /$1/;
-
- #
- # Use passive FTP by default.
- $ENV{FTP_PASSIVE} = 1 unless exists($ENV{FTP_PASSIVE});
-
- # Read friendly error message file
- # 'en_US' should be replaced by $lang for lang-neg
- %RSRC = Config::General->new(
- -MergeDuplicateBlocks => 1,
- -ConfigFile => catfile($CFG->{Paths}->{Templates}, 'en_US',
- 'error_messages.cfg'),
- )->getall();
-
- eval {
- local $SIG{__DIE__};
- require Encode::JIS2K; # for optional extra Japanese encodings
- };
-
-} # end of BEGIN block.
+ }
+
+ #
+ # Check paths in config...
+ # @@FIXME: This does not do a very good job error-message-wise if a path is
+ # @@FIXME: missing...;
+ {
+ my @dirs = ();
+ push @dirs, $CFG->{Paths}->{Base};
+ push @dirs, $CFG->{Paths}->{Templates};
+ push @dirs, $CFG->{Paths}->{SGML}->{Library};
+ &pathcheck(@dirs);
+ }
+
+ #
+ # Split allowed protocols into a list.
+ if (my $allowed = delete($CFG->{Protocols}->{Allow})) {
+ $CFG->{Protocols}->{Allow} = [split(/\s*,\s*/, $allowed)];
+ }
+
+ # Split available languages into a list
+ if (my $langs = delete($CFG->{Languages})) {
+ $CFG->{Languages} = [split(/\s+/, $langs)];
+ }
+ else {
+
+ # Default to english
+ $CFG->{Languages} = ["en"];
+ }
+
+ { # Make types config indexed by FPI.
+ my $_types = {};
+ map { $_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_} }
+ keys %{$CFG->{Types}};
+ $CFG->{Types} = $_types;
+ }
+
+ #
+ # Change strings to internal constants in MIME type mapping.
+ for (keys %{$CFG->{MIME}}) {
+ $CFG->{MIME}->{$_} = 'TBD'
+ unless $CFG->{MIME}->{$_} eq 'SGML' or
+ $CFG->{MIME}->{$_} eq 'XML';
+ }
+
+ #
+ # Register Encode aliases.
+ while (my ($key, $value) = each %{$CFG->{Charsets}}) {
+ Encode::Alias::define_alias($key, $1) if ($value =~ /^[AX] (\S+)/);
+ }
+
+ #
+ # Set debug flag.
+ if ($CFG->{'Allow Debug'}) {
+ $DEBUG = TRUE if $ENV{W3C_VALIDATOR_DEBUG} || $CFG->{'Enable Debug'};
+ }
+ else {
+ $DEBUG = FALSE;
+ }
+
+ #
+ # Strings
+ $VERSION = q$Revision: 1.720 $;
+ $VERSION =~ s/Revision: ([\d\.]+) /$1/;
+
+ #
+ # Use passive FTP by default.
+ $ENV{FTP_PASSIVE} = 1 unless exists($ENV{FTP_PASSIVE});
+
+ # Read friendly error message file
+ # 'en_US' should be replaced by $lang for lang-neg
+ %RSRC = Config::General->new(
+ -MergeDuplicateBlocks => 1,
+ -ConfigFile =>
+ catfile($CFG->{Paths}->{Templates}, 'en_US', 'error_messages.cfg'),
+ )->getall();
+
+ eval {
+ local $SIG{__DIE__};
+ require Encode::JIS2K; # for optional extra Japanese encodings
+ };
+
+} # end of BEGIN block.
#
# Get rid of (possibly insecure) $PATH.
@@ -274,18 +280,20 @@ $File->{Charset}->{Override} = ''; # From CGI/user override.
#
# Misc simple types.
-$File->{Mode} = 'DTD+SGML'; # Default parse mode is DTD validation in SGML mode.
+$File->{Mode} =
+ 'DTD+SGML'; # Default parse mode is DTD validation in SGML mode.
# By default, perform validation (we may perform only xml-wf in some cases)
$File->{XMLWF_ONLY} = FALSE;
+
#
# Listrefs.
-$File->{Warnings} = []; # Warnings...
-$File->{Namespaces} = []; # Other (non-root) Namespaces.
+$File->{Warnings} = []; # Warnings...
+$File->{Namespaces} = []; # Other (non-root) Namespaces.
# By default, doctype-less documents can not be valid
-$File->{"DOCTYPEless OK"} = FALSE;
-$File->{"Default DOCTYPE"}->{"HTML"} = 'HTML 4.01 Transitional';
+$File->{"DOCTYPEless OK"} = FALSE;
+$File->{"Default DOCTYPE"}->{"HTML"} = 'HTML 4.01 Transitional';
$File->{"Default DOCTYPE"}->{"XHTML"} = 'XHTML 1.0 Transitional';
###############################################################################
@@ -299,42 +307,46 @@ $File->{"Default DOCTYPE"}->{"XHTML"} = 'XHTML 1.0 Transitional';
my $lang = $q->param('lang') || '';
my @localizations;
foreach my $lang_available (@{$CFG->{Languages}}) {
- if ($lang eq $lang_available) {
- # Requested language (from parameters) is available, just use it
- undef @localizations;
- last;
- }
- push @localizations,
- [$lang_available, 1, 'text/html', undef, 'utf-8', $lang_available, undef];
+ if ($lang eq $lang_available) {
+
+ # Requested language (from parameters) is available, just use it
+ undef @localizations;
+ last;
+ }
+ push @localizations,
+ [
+ $lang_available, 1, 'text/html', undef,
+ 'utf-8', $lang_available, undef
+ ];
}
# If language is not chosen yet, use HTTP-based negotiation
if (@localizations) {
- require HTTP::Negotiate;
- $lang = HTTP::Negotiate::choose(\@localizations);
+ require HTTP::Negotiate;
+ $lang = HTTP::Negotiate::choose(\@localizations);
}
# HTTP::Negotiate::choose may return undef e.g if sent Accept-Language: en;q=0
$lang ||= 'en_US';
if ($lang eq "en") {
- $lang = 'en_US'; # legacy
+ $lang = 'en_US'; # legacy
}
$File->{Template_Defaults} = {
- die_on_bad_params => FALSE,
- loop_context_vars => TRUE,
- global_vars => TRUE,
- path => [ catfile($CFG->{Paths}->{Templates}, $lang) ],
- filter => sub { my $ref = shift; ${$ref} = Encode::decode_utf8(${$ref}); },
+ die_on_bad_params => FALSE,
+ loop_context_vars => TRUE,
+ global_vars => TRUE,
+ path => [catfile($CFG->{Paths}->{Templates}, $lang)],
+ filter => sub { my $ref = shift; ${$ref} = Encode::decode_utf8(${$ref}); },
};
if (IS_MODPERL2()) {
- $File->{Template_Defaults}->{cache} = TRUE;
+ $File->{Template_Defaults}->{cache} = TRUE;
}
elsif ($CFG->{Paths}->{Cache}) {
- $File->{Template_Defaults}->{file_cache} = TRUE;
- $File->{Template_Defaults}->{file_cache_dir} =
- rel2abs($CFG->{Paths}->{Cache}, tmpdir());
+ $File->{Template_Defaults}->{file_cache} = TRUE;
+ $File->{Template_Defaults}->{file_cache_dir} =
+ rel2abs($CFG->{Paths}->{Cache}, tmpdir());
}
undef $lang;
@@ -349,32 +361,35 @@ $q = &prepCGI($File, $q);
#
# Set session switches.
-$File->{Opt}->{Outline} = $q->param('outline') ? TRUE : FALSE;
-$File->{Opt}->{'Show Source'} = $q->param('ss') ? TRUE : FALSE;
-$File->{Opt}->{'Show Tidy'} = $q->param('st') ? TRUE : FALSE;
-$File->{Opt}->{Verbose} = $q->param('verbose') ? TRUE : FALSE;
-$File->{Opt}->{'Group Errors'} = $q->param('group') ? TRUE : FALSE;
-$File->{Opt}->{Debug} = $q->param('debug') ? TRUE : FALSE;
-$File->{Opt}->{No200} = $q->param('No200') ? TRUE : FALSE;
-$File->{Opt}->{Prefill} = $q->param('prefill') ? TRUE : FALSE;
+$File->{Opt}->{Outline} = $q->param('outline') ? TRUE : FALSE;
+$File->{Opt}->{'Show Source'} = $q->param('ss') ? TRUE : FALSE;
+$File->{Opt}->{'Show Tidy'} = $q->param('st') ? TRUE : FALSE;
+$File->{Opt}->{Verbose} = $q->param('verbose') ? TRUE : FALSE;
+$File->{Opt}->{'Group Errors'} = $q->param('group') ? TRUE : FALSE;
+$File->{Opt}->{Debug} = $q->param('debug') ? TRUE : FALSE;
+$File->{Opt}->{No200} = $q->param('No200') ? TRUE : FALSE;
+$File->{Opt}->{Prefill} = $q->param('prefill') ? TRUE : FALSE;
$File->{Opt}->{'Prefill Doctype'} = $q->param('prefill_doctype') || 'html401';
-$File->{Opt}->{Charset} = lc($q->param('charset') || '');
-$File->{Opt}->{DOCTYPE} = $q->param('doctype') || '';
-$File->{Opt}->{Output} = $q->param('output') || 'html';
-
-$File->{Opt}->{'User Agent'} = $q->param('user-agent') && $q->param('user-agent') ne 1 ? $q->param('user-agent') : "W3C_Validator/$VERSION";
+$File->{Opt}->{Charset} = lc($q->param('charset') || '');
+$File->{Opt}->{DOCTYPE} = $q->param('doctype') || '';
+$File->{Opt}->{Output} = $q->param('output') || 'html';
+
+$File->{Opt}->{'User Agent'} =
+ $q->param('user-agent') &&
+ $q->param('user-agent') ne 1 ? $q->param('user-agent') :
+ "W3C_Validator/$VERSION";
$File->{Opt}->{'User Agent'} =~ tr/\x00-\x09\x0b\x0c-\x1f//d;
if ($File->{Opt}->{'User Agent'} eq 'mobileok') {
- $File->{Opt}->{'User Agent'} = 'W3C-mobileOK/DDC-1.0 (see http://www.w3.org/2006/07/mobileok-ddc)';
+ $File->{Opt}->{'User Agent'} =
+ 'W3C-mobileOK/DDC-1.0 (see http://www.w3.org/2006/07/mobileok-ddc)';
}
-
$File->{Opt}->{'Accept Header'} = $q->param('accept') || '';
$File->{Opt}->{'Accept-Language Header'} = $q->param('accept-language') || '';
$File->{Opt}->{'Accept-Charset Header'} = $q->param('accept-charset') || '';
$File->{Opt}->{$_} =~ tr/\x00-\x09\x0b\x0c-\x1f//d
- for ('Accept Header', 'Accept-Language Header', 'Accept-Charset Header');
+ for ('Accept Header', 'Accept-Language Header', 'Accept-Charset Header');
#
# "Fallback" info for Character Encoding (fbc), Content-Type (fbt),
@@ -389,10 +404,11 @@ $File->{Opt}->{FB}->{DOCTYPE} = $q->param('fbd') ? TRUE : FALSE;
# regardless of whether it's "0" or "1" (on or off), but only if config
# allows the debugging options.
if ($CFG->{'Allow Debug'}) {
- $DEBUG = $q->param('debug') if defined $q->param('debug');
- $File->{Opt}->{Verbose} = TRUE if $DEBUG;
-} else {
- $DEBUG = FALSE; # The default.
+ $DEBUG = $q->param('debug') if defined $q->param('debug');
+ $File->{Opt}->{Verbose} = TRUE if $DEBUG;
+}
+else {
+ $DEBUG = FALSE; # The default.
}
$File->{Opt}->{Debug} = $DEBUG;
@@ -401,11 +417,13 @@ $File->{Opt}->{Debug} = $DEBUG;
#
# Get the file and metadata.
if ($q->param('uploaded_file')) {
- $File = &handle_file($q, $File);
-} elsif ($q->param('fragment')) {
- $File = &handle_frag($q, $File);
-} elsif ($q->param('uri')) {
- $File = &handle_uri($q, $File);
+ $File = &handle_file($q, $File);
+}
+elsif ($q->param('fragment')) {
+ $File = &handle_frag($q, $File);
+}
+elsif ($q->param('uri')) {
+ $File = &handle_uri($q, $File);
}
#
@@ -430,79 +448,98 @@ $File = find_encodings($File);
#
# Decide on a charset to use (first part)
#
-if ($File->{Charset}->{HTTP}) { # HTTP, if given, is authoritative.
- $File->{Charset}->{Use} = $File->{Charset}->{HTTP};
-} elsif ($File->{ContentType} =~ m(^text/([-.a-zA-Z0-9]\+)?xml$)) {
- # Act as if $http_charset was 'us-ascii'. (MIME rules)
- $File->{Charset}->{Use} = 'us-ascii';
-
- &add_warning('W01', {
- W01_upload => $File->{'Is Upload'},
- W01_agent => $File->{Server},
- W01_ct => $File->{ContentType},
- });
-
-} elsif ($File->{Charset}->{XML}) {
- $File->{Charset}->{Use} = $File->{Charset}->{XML};
-} elsif ($File->{Charset}->{Auto} =~ /^utf-16[bl]e$/ && $File->{BOM} == 2) {
- $File->{Charset}->{Use} = 'utf-16';
-} elsif ($File->{ContentType} =~ m(^application/([-.a-zA-Z0-9]+\+)?xml$)) {
- $File->{Charset}->{Use} = "utf-8";
-} elsif (&is_xml($File) and not $File->{ContentType} =~ m(^text/)) {
- $File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.)
+if ($File->{Charset}->{HTTP}) { # HTTP, if given, is authoritative.
+ $File->{Charset}->{Use} = $File->{Charset}->{HTTP};
+}
+elsif ($File->{ContentType} =~ m(^text/([-.a-zA-Z0-9]\+)?xml$)) {
+
+ # Act as if $http_charset was 'us-ascii'. (MIME rules)
+ $File->{Charset}->{Use} = 'us-ascii';
+
+ &add_warning(
+ 'W01',
+ { W01_upload => $File->{'Is Upload'},
+ W01_agent => $File->{Server},
+ W01_ct => $File->{ContentType},
+ }
+ );
+
+}
+elsif ($File->{Charset}->{XML}) {
+ $File->{Charset}->{Use} = $File->{Charset}->{XML};
+}
+elsif ($File->{Charset}->{Auto} =~ /^utf-16[bl]e$/ && $File->{BOM} == 2) {
+ $File->{Charset}->{Use} = 'utf-16';
+}
+elsif ($File->{ContentType} =~ m(^application/([-.a-zA-Z0-9]+\+)?xml$)) {
+ $File->{Charset}->{Use} = "utf-8";
+}
+elsif (&is_xml($File) and not $File->{ContentType} =~ m(^text/)) {
+ $File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.)
}
$File->{Charset}->{Use} ||= $File->{Charset}->{META};
#
# Handle any Fallback or Override for the charset.
if (charset_not_equal($File->{Opt}->{Charset}, '(detect automatically)')) {
- # charset=foo was given to the CGI and it wasn't "autodetect" or empty.
- #
- # Extract the user-requested charset from CGI param.
- my ($override, undef) = split(/\s/, $File->{Opt}->{Charset}, 2);
- $File->{Charset}->{Override} = lc($override);
-
- if ($File->{Opt}->{FB}->{Charset}) { # charset fallback mode
- unless ($File->{Charset}->{Use}) { # no charset detected, actual fallback
- &add_warning('W02', {W02_charset => $File->{Charset}->{Override}});
- $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
- $File->{Charset}->{Use} = $File->{Charset}->{Override};
- }
- }
- else { # charset "hard override" mode
- if (! $File->{Charset}->{Use}) { # overriding "nothing"
- &add_warning('W04', { W04_charset => $File->{Charset}->{Override},
- W04_override => TRUE});
- $File->{Tentative} |= T_ERROR;
- $File->{Charset}->{Use} = $File->{Charset}->{Override};
- }
- elsif ($File->{Charset}->{Override} ne $File->{Charset}->{Use}) {
- # Actually overriding something; warn about override.
- &add_warning('W03', { W03_use => $File->{Charset}->{Use},
- W03_opt => $File->{Charset}->{Override}});
- $File->{Tentative} |= T_ERROR;
- $File->{Charset}->{Use} = $File->{Charset}->{Override};
- }
- }
-}
-
-if ($File->{'Direct Input'}) { #explain why UTF-8 is forced
+
+ # charset=foo was given to the CGI and it wasn't "autodetect" or empty.
+ #
+ # Extract the user-requested charset from CGI param.
+ my ($override, undef) = split(/\s/, $File->{Opt}->{Charset}, 2);
+ $File->{Charset}->{Override} = lc($override);
+
+ if ($File->{Opt}->{FB}->{Charset}) { # charset fallback mode
+ unless ($File->{Charset}->{Use})
+ { # no charset detected, actual fallback
+ &add_warning('W02', {W02_charset => $File->{Charset}->{Override}});
+ $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
+ $File->{Charset}->{Use} = $File->{Charset}->{Override};
+ }
+ }
+ else { # charset "hard override" mode
+ if (!$File->{Charset}->{Use}) { # overriding "nothing"
+ &add_warning(
+ 'W04',
+ { W04_charset => $File->{Charset}->{Override},
+ W04_override => TRUE
+ }
+ );
+ $File->{Tentative} |= T_ERROR;
+ $File->{Charset}->{Use} = $File->{Charset}->{Override};
+ }
+ elsif ($File->{Charset}->{Override} ne $File->{Charset}->{Use}) {
+
+ # Actually overriding something; warn about override.
+ &add_warning(
+ 'W03',
+ { W03_use => $File->{Charset}->{Use},
+ W03_opt => $File->{Charset}->{Override}
+ }
+ );
+ $File->{Tentative} |= T_ERROR;
+ $File->{Charset}->{Use} = $File->{Charset}->{Override};
+ }
+ }
+}
+
+if ($File->{'Direct Input'}) { #explain why UTF-8 is forced
&add_warning('W28', {});
}
-unless ($File->{Charset}->{XML} || $File->{Charset}->{META}){ #suggest character encoding info within doc
+unless ($File->{Charset}->{XML} || $File->{Charset}->{META})
+{ #suggest character encoding info within doc
&add_warning('W27', {});
}
-
#
# Abort if an error was flagged while finding the encoding.
-&abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE);
+&abort_if_error_flagged($File, O_CHARSET | O_DOCTYPE);
$File->{Charset}->{Default} = FALSE;
-unless ($File->{Charset}->{Use}) { # No charset given...
- $File->{Charset}->{Use} = 'utf-8';
- $File->{Charset}->{Default} = TRUE;
- $File->{Tentative} |= T_ERROR; # Can never be valid.
+unless ($File->{Charset}->{Use}) { # No charset given...
+ $File->{Charset}->{Use} = 'utf-8';
+ $File->{Charset}->{Default} = TRUE;
+ $File->{Tentative} |= T_ERROR; # Can never be valid.
&add_warning('W04', {W04_charset => "UTF-8"});
}
@@ -511,15 +548,16 @@ $File = transcode($File);
# Try guessing if it didn't work out
if ($File->{ContentType} eq 'text/html' && $File->{Charset}->{Default}) {
- my $also_tried = 'UTF-8';
- for my $cs (qw(windows-1252 iso-8859-1)) {
- last unless $File->{'Error Flagged'};
- $File->{'Error Flagged'} = FALSE; # reset
- $File->{Charset}->{Use} = $cs;
- &add_warning('W04', { W04_charset => $cs, W04_also_tried => $also_tried });
- $File = transcode($File);
- $also_tried .= ", $cs";
- }
+ my $also_tried = 'UTF-8';
+ for my $cs (qw(windows-1252 iso-8859-1)) {
+ last unless $File->{'Error Flagged'};
+ $File->{'Error Flagged'} = FALSE; # reset
+ $File->{Charset}->{Use} = $cs;
+ &add_warning('W04',
+ {W04_charset => $cs, W04_also_tried => $also_tried});
+ $File = transcode($File);
+ $also_tried .= ", $cs";
+ }
}
# if it still does not work, we abandon hope here
@@ -528,8 +566,9 @@ if ($File->{ContentType} eq 'text/html' && $File->{Charset}->{Default}) {
#
# Add a warning if doc is UTF-8 and contains a BOM.
if ($File->{Charset}->{Use} eq 'utf-8' &&
- $File->{Content}->[0] =~ m(^\x{FEFF})) {
- &add_warning('W21', {});
+ $File->{Content}->[0] =~ m(^\x{FEFF}))
+{
+ &add_warning('W21', {});
}
#
@@ -547,30 +586,30 @@ if ($File->{Charset}->{Use} eq 'utf-8' &&
#
# Override DOCTYPE if user asked for it.
if ($File->{Opt}->{DOCTYPE}) {
- if ($File->{Opt}->{DOCTYPE} !~ /(Inline|detect)/i) {
- $File = &override_doctype($File);
- }
- else {
- # Get rid of inline|detect for easy truth value checking later
- $File->{Opt}->{DOCTYPE} = '';
- }
+ if ($File->{Opt}->{DOCTYPE} !~ /(Inline|detect)/i) {
+ $File = &override_doctype($File);
+ }
+ else {
+
+ # Get rid of inline|detect for easy truth value checking later
+ $File->{Opt}->{DOCTYPE} = '';
+ }
}
# Try to extract a DOCTYPE or xmlns.
$File = &preparse_doctype($File);
if ($File->{Opt}->{DOCTYPE} eq "HTML5") {
- $File->{DOCTYPE} = "HTML5";
- $File->{Version} = $File->{DOCTYPE};
+ $File->{DOCTYPE} = "HTML5";
+ $File->{Version} = $File->{DOCTYPE};
}
set_parse_mode($File, $CFG);
+
#
# Sanity check Charset information and add any warnings necessary.
$File = &charset_conflicts($File);
-
-
# before we start the parsing, clean slate
$File->{'Is Valid'} = TRUE;
$File->{Errors} = [];
@@ -581,312 +620,369 @@ $File->{Errors} = [];
# ditto, we should try using W3C::Validator::EventHandler,
# but it's badly linked to opensp at the moment
if (&is_xml($File)) {
- if ($File->{DOCTYPE} eq "HTML5")
- {
- # $File->{DOCTYPE} = "XHTML5";
- # $File->{Version} = "XHTML5";
- }
- else {
- my $xmlparser = XML::LibXML->new();
- $xmlparser->line_numbers(1);
- $xmlparser->validation(0);
- $xmlparser->load_ext_dtd(0);
- # [NOT] loading the XML catalog for entities resolution as it seems to cause a lot of unnecessary DTD/entities fetching (requires >= 1.53 if enabled)
- #$xmlparser->load_catalog(catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc'));
- my $xml_string = join"\n",@{$File->{Content}};
- # the XML parser will check the value of encoding attribute in XML declaration
- # so we have to amend it to reflect transcoding. see Bug 4867
- $xml_string =~ s/(<\?xml.*)
+ if ($File->{DOCTYPE} eq "HTML5") {
+
+ # $File->{DOCTYPE} = "XHTML5";
+ # $File->{Version} = "XHTML5";
+ }
+ else {
+ my $xmlparser = XML::LibXML->new();
+ $xmlparser->line_numbers(1);
+ $xmlparser->validation(0);
+ $xmlparser->load_ext_dtd(0);
+
+ # [NOT] loading the XML catalog for entities resolution as it seems to cause a lot of unnecessary DTD/entities fetching (requires >= 1.53 if enabled)
+ #$xmlparser->load_catalog(catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc'));
+ my $xml_string = join "\n", @{$File->{Content}};
+
+ # the XML parser will check the value of encoding attribute in XML declaration
+ # so we have to amend it to reflect transcoding. see Bug 4867
+ $xml_string =~ s/(<\?xml.*)
(encoding[\x20|\x09|\x0D|\x0A]*=[\x20|\x09|\x0D|\x0A]*(?:"[A-Za-z][a-zA-Z0-9_-]+"|'[A-Za-z][a-zA-Z0-9_-]+'))
(.*\?>)/$1encoding="utf-8"$3/sx;
- eval {
- $xmlparser->parse_string($xml_string);
- };
- $xml_string = undef;
- my $xml_parse_errors_line = undef;
- my @xmlwf_error_list;
- my @xmlwf_obj_error_list;
- if (ref($@)) {
- # handle a structured error (XML::LibXML::Error object)
- # (lib XML::LibXML > 0.66, but will work MUCH better > 0.69 )
- push (@xmlwf_obj_error_list, $@);
- my $err_obj = $@;
- while($err_obj->_prev()) {
- $err_obj = $err_obj->_prev();
- unshift(@xmlwf_obj_error_list, $err_obj);
- }
- my $num_xmlwf_error = 0;
- foreach my $err_obj (@xmlwf_obj_error_list){
- my $err;
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $err_obj->line();
- # -> column() is available in XML::LibXML >= 1.69_2
- $err->{char} = eval { $err_obj->column() };
- $err->{num} = "libxml2-".$err_obj->code();
- $err->{type} = "E";
- $err->{msg} = $err_obj->message();
- # The validator will sometimes fail to dereference entities files
- # we're filtering the bogus resulting error
- if ($err->{msg} =~ /Entity '\w+' not defined/) {
- $err = undef;
- next;
+ eval { $xmlparser->parse_string($xml_string); };
+ $xml_string = undef;
+ my $xml_parse_errors_line = undef;
+ my @xmlwf_error_list;
+ my @xmlwf_obj_error_list;
+
+ if (ref($@)) {
+
+ # handle a structured error (XML::LibXML::Error object)
+ # (lib XML::LibXML > 0.66, but will work MUCH better > 0.69 )
+ push(@xmlwf_obj_error_list, $@);
+ my $err_obj = $@;
+ while ($err_obj->_prev()) {
+ $err_obj = $err_obj->_prev();
+ unshift(@xmlwf_obj_error_list, $err_obj);
+ }
+ my $num_xmlwf_error = 0;
+ foreach my $err_obj (@xmlwf_obj_error_list) {
+ my $err;
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $err_obj->line();
+
+ # -> column() is available in XML::LibXML >= 1.69_2
+ $err->{char} = eval { $err_obj->column() };
+ $err->{num} = "libxml2-" . $err_obj->code();
+ $err->{type} = "E";
+ $err->{msg} = $err_obj->message();
+
+ # The validator will sometimes fail to dereference entities files
+ # we're filtering the bogus resulting error
+ if ($err->{msg} =~ /Entity '\w+' not defined/) {
+ $err = undef;
+ next;
+ }
+ push(@xmlwf_error_list, $err);
+ $num_xmlwf_error++;
}
- push (@xmlwf_error_list, $err);
- $num_xmlwf_error++;
- }
- }
- elsif ($@) {
- my $xmlwf_errors = $@;
- my $xmlwf_error_line = undef;
- my $xmlwf_error_col = undef;
- my $xmlwf_error_msg = undef;
- my $got_error_message = 0;
- my $got_quoted_line = 0;
- my $num_xmlwf_error = 0;
- foreach my $msg_line (split "\n", $xmlwf_errors){
-
- $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
- $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
-
- # first we get the actual error message
- if (($got_error_message eq 0) and ($msg_line =~ /^(:\d+:)( parser error : .*)/ )) {
- $xmlwf_error_line = $1;
- $xmlwf_error_msg = $2;
- $xmlwf_error_line =~ s/:(\d+):/$1/;
- $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
- $got_error_message = 1;
- }
- # then we skip the second line, which shows the context (we don't use that)
- elsif (($got_error_message eq 1) and ($got_quoted_line eq 0)) {
- $got_quoted_line = 1;
- }
- # we now take the third line, with the pointer to the error's column
- elsif (($msg_line =~ /(\s+)\^/) and ($got_error_message eq 1) and ($got_quoted_line eq 1)) {
- $xmlwf_error_col = length($1);
- }
-
- # cleanup for a number of bugs for the column number
- if (defined($xmlwf_error_col)) {
- if ((my $l = length($File->{Content}->[$xmlwf_error_line-1])) < $xmlwf_error_col) {
- # http://bugzilla.gnome.org/show_bug.cgi?id=434196
- #warn("Warning: reported error column larger than line length " .
- # "($xmlwf_error_col > $l) in $File->{URI} line " .
- # "$xmlwf_error_line, libxml2 bug? Resetting to line length.");
- $xmlwf_error_col = $l;
- }
- elsif ($xmlwf_error_col == 79) {
- # working around an apparent odd limitation of libxml
- # which only gives context for lines up to 80 chars
- # http://www.w3.org/Bugs/Public/show_bug.cgi?id=4420
- # http://bugzilla.gnome.org/show_bug.cgi?id=424017
- $xmlwf_error_col = "> 80";
- # non-int line number will trigger the proper behavior in report_error
- }
- }
-
- # when we have all the info (one full error message), proceed and move on to the next error
- if ((defined $xmlwf_error_line) and (defined $xmlwf_error_col) and (defined $xmlwf_error_msg)){
- # Reinitializing for the next batch of 3 lines
- $got_error_message = 0;
- $got_quoted_line = 0;
-
- # formatting the error message for output
- my $err;
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $xmlwf_error_line;
- $err->{char} = $xmlwf_error_col;
- $err->{num} = 'xmlwf';
- $err->{type} = "E";
- $err->{msg} = $xmlwf_error_msg;
-
- # The validator will sometimes fail to dereference entities files
- # we're filtering the bogus resulting error
- if ($err->{msg} =~ /Entity '\w+' not defined/) {
- $xmlwf_error_line = undef;
- $xmlwf_error_col = undef;
- $xmlwf_error_msg = undef;
- next;
- }
- push (@xmlwf_error_list, $err);
- $xmlwf_error_line = undef;
- $xmlwf_error_col = undef;
- $xmlwf_error_msg = undef;
- $num_xmlwf_error++;
-
}
- }
- }
- foreach my $errmsg (@xmlwf_error_list){
- $File->{'Is Valid'} = FALSE;
- push @{$File->{WF_Errors}}, $errmsg;
- }
- }
-}
+ elsif ($@) {
+ my $xmlwf_errors = $@;
+ my $xmlwf_error_line = undef;
+ my $xmlwf_error_col = undef;
+ my $xmlwf_error_msg = undef;
+ my $got_error_message = 0;
+ my $got_quoted_line = 0;
+ my $num_xmlwf_error = 0;
+ foreach my $msg_line (split "\n", $xmlwf_errors) {
+
+ $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
+ $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
+
+ # first we get the actual error message
+ if (($got_error_message eq 0) and
+ ($msg_line =~ /^(:\d+:)( parser error : .*)/))
+ {
+ $xmlwf_error_line = $1;
+ $xmlwf_error_msg = $2;
+ $xmlwf_error_line =~ s/:(\d+):/$1/;
+ $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
+ $got_error_message = 1;
+ }
+
+ # then we skip the second line, which shows the context (we don't use that)
+ elsif (($got_error_message eq 1) and ($got_quoted_line eq 0)) {
+ $got_quoted_line = 1;
+ }
+
+ # we now take the third line, with the pointer to the error's column
+ elsif (($msg_line =~ /(\s+)\^/) and
+ ($got_error_message eq 1) and
+ ($got_quoted_line eq 1))
+ {
+ $xmlwf_error_col = length($1);
+ }
+ # cleanup for a number of bugs for the column number
+ if (defined($xmlwf_error_col)) {
+ if (( my $l =
+ length($File->{Content}->[$xmlwf_error_line - 1])
+ ) < $xmlwf_error_col
+ )
+ {
+
+ # http://bugzilla.gnome.org/show_bug.cgi?id=434196
+ #warn("Warning: reported error column larger than line length " .
+ # "($xmlwf_error_col > $l) in $File->{URI} line " .
+ # "$xmlwf_error_line, libxml2 bug? Resetting to line length.");
+ $xmlwf_error_col = $l;
+ }
+ elsif ($xmlwf_error_col == 79) {
+
+ # working around an apparent odd limitation of libxml
+ # which only gives context for lines up to 80 chars
+ # http://www.w3.org/Bugs/Public/show_bug.cgi?id=4420
+ # http://bugzilla.gnome.org/show_bug.cgi?id=424017
+ $xmlwf_error_col = "> 80";
+
+ # non-int line number will trigger the proper behavior in report_error
+ }
+ }
+ # when we have all the info (one full error message), proceed and move on to the next error
+ if ((defined $xmlwf_error_line) and
+ (defined $xmlwf_error_col) and
+ (defined $xmlwf_error_msg))
+ {
+
+ # Reinitializing for the next batch of 3 lines
+ $got_error_message = 0;
+ $got_quoted_line = 0;
+
+ # formatting the error message for output
+ my $err;
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $xmlwf_error_line;
+ $err->{char} = $xmlwf_error_col;
+ $err->{num} = 'xmlwf';
+ $err->{type} = "E";
+ $err->{msg} = $xmlwf_error_msg;
+
+ # The validator will sometimes fail to dereference entities files
+ # we're filtering the bogus resulting error
+ if ($err->{msg} =~ /Entity '\w+' not defined/) {
+ $xmlwf_error_line = undef;
+ $xmlwf_error_col = undef;
+ $xmlwf_error_msg = undef;
+ next;
+ }
+ push(@xmlwf_error_list, $err);
+ $xmlwf_error_line = undef;
+ $xmlwf_error_col = undef;
+ $xmlwf_error_msg = undef;
+ $num_xmlwf_error++;
+ }
+ }
+ }
+ foreach my $errmsg (@xmlwf_error_list) {
+ $File->{'Is Valid'} = FALSE;
+ push @{$File->{WF_Errors}}, $errmsg;
+ }
+ }
+}
if (($File->{DOCTYPE} eq "HTML5") or ($File->{DOCTYPE} eq "XHTML5")) {
- if ($CFG->{External}->{HTML5}) {
- $File = &html5_validate($File);
- &add_warning('W00', {
- W00_experimental_name => "HTML5 Conformance Checker",
- W00_experimental_URI => "feedback.html"
- });
- }
- else {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_no_checker => TRUE,
- fatal_missing_checker => 'HTML5 Validator',
- );
- }
+ if ($CFG->{External}->{HTML5}) {
+ $File = &html5_validate($File);
+ &add_warning(
+ 'W00',
+ { W00_experimental_name => "HTML5 Conformance Checker",
+ W00_experimental_URI => "feedback.html"
+ }
+ );
+ }
+ else {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_no_checker => TRUE,
+ fatal_missing_checker => 'HTML5 Validator',
+ );
+ }
}
-elsif(($File->{DOCTYPE} eq '') and (($File->{Root} eq "svg") or @{$File->{Namespaces}} >1)){
+elsif (($File->{DOCTYPE} eq '') and
+ (($File->{Root} eq "svg") or @{$File->{Namespaces}} > 1))
+{
+
# we send doctypeless SVG, or any doctypeless XML document with multiple namespaces found, to a different engine
# WARNING this is experimental.
if ($CFG->{External}->{CompoundXML}) {
- $File = &compoundxml_validate($File);
- &add_warning('W00', {
- W00_experimental_name => "validator.nu Conformance Checker",
- W00_experimental_URI => "feedback.html"
- });
+ $File = &compoundxml_validate($File);
+ &add_warning(
+ 'W00',
+ { W00_experimental_name => "validator.nu Conformance Checker",
+ W00_experimental_URI => "feedback.html"
+ }
+ );
}
}
else {
- $File = &dtd_validate($File);
+ $File = &dtd_validate($File);
}
&abort_if_error_flagged($File, 0);
#
# Force "XML" if type is an XML type and an FPI was not found.
# Otherwise set the type to be the FPI.
-if (&is_xml($File) and not $File->{DOCTYPE} and lc($File->{Root}) ne 'html') {
- $File->{Version} = 'XML';
-} else {
- $File->{Version} ||= $File->{DOCTYPE};
+if (&is_xml($File) and not $File->{DOCTYPE} and lc($File->{Root}) ne 'html') {
+ $File->{Version} = 'XML';
+}
+else {
+ $File->{Version} ||= $File->{DOCTYPE};
}
#
# Get the pretty text version of the FPI if a mapping exists.
if (my $prettyver = $CFG->{Types}->{$File->{Version}}->{Display}) {
- $File->{Version} = $prettyver;
+ $File->{Version} = $prettyver;
}
#
# check the received mime type against Allowed mime types
-if ($File->{ContentType}){
- my @allowedMediaType =
- split(/\s+/, $CFG->{Types}->{$File->{DOCTYPE}}->{Types}->{Allowed} || '');
- my $usedCTisAllowed;
- if (scalar @allowedMediaType) {
- $usedCTisAllowed = FALSE;
- foreach (@allowedMediaType) { $usedCTisAllowed = TRUE if ($_ eq $File->{ContentType}); }
- }
- else {
- # wedon't know what media type is recommended, so better shut up
- $usedCTisAllowed = TRUE;
- }
- if(! $usedCTisAllowed ){
- &add_warning('W23', {
- W23_type => $File->{ContentType},
- W23_type_pref => $CFG->{Types}->{$File->{DOCTYPE}}->{Types}->{Preferred},
- w23_doctype => $File->{Version}
- });
- }
+if ($File->{ContentType}) {
+ my @allowedMediaType =
+ split(/\s+/,
+ $CFG->{Types}->{$File->{DOCTYPE}}->{Types}->{Allowed} || '');
+ my $usedCTisAllowed;
+ if (scalar @allowedMediaType) {
+ $usedCTisAllowed = FALSE;
+ foreach (@allowedMediaType) {
+ $usedCTisAllowed = TRUE if ($_ eq $File->{ContentType});
+ }
+ }
+ else {
+
+ # wedon't know what media type is recommended, so better shut up
+ $usedCTisAllowed = TRUE;
+ }
+ if (!$usedCTisAllowed) {
+ &add_warning(
+ 'W23',
+ { W23_type => $File->{ContentType},
+ W23_type_pref =>
+ $CFG->{Types}->{$File->{DOCTYPE}}->{Types}->{Preferred},
+ w23_doctype => $File->{Version}
+ }
+ );
+ }
}
#
# Warn about unknown, incorrect, or missing Namespaces.
if ($File->{Namespace}) {
- my $ns = $CFG->{Types}->{$File->{Version}}->{Namespace} || FALSE;
-
- if (&is_xml($File)) {
- if ($ns eq $File->{Namespace}) {
- &add_warning('W10', {
- W10_ns => $File->{Namespace},
- W10_type => $File->{Type},
- });
+ my $ns = $CFG->{Types}->{$File->{Version}}->{Namespace} || FALSE;
+
+ if (&is_xml($File)) {
+ if ($ns eq $File->{Namespace}) {
+ &add_warning(
+ 'W10',
+ { W10_ns => $File->{Namespace},
+ W10_type => $File->{Type},
+ }
+ );
+ }
+ }
+ else {
+ &add_warning(
+ 'W11',
+ { W11_ns => $File->{Namespace},
+ w11_doctype => $File->{DOCTYPE}
+ }
+ );
+ }
+}
+else {
+ if (&is_xml($File) and $CFG->{Types}->{$File->{Version}}->{Namespace}) {
+ &add_warning('W12', {});
}
- } else {
- &add_warning('W11', {W11_ns => $File->{Namespace},
- w11_doctype => $File->{DOCTYPE}});
- }
-} else {
- if (&is_xml($File) and $CFG->{Types}->{$File->{Version}}->{Namespace}) {
- &add_warning('W12', {});
- }
}
-
## if invalid content, AND if requested, pass through tidy
if (!$File->{'Is Valid'} && $File->{Opt}->{'Show Tidy'}) {
- eval {
- local $SIG{__DIE__};
- require HTML::Tidy;
- my $tidy = HTML::Tidy->new({config_file => $CFG->{Paths}->{TidyConf}});
- my $cleaned = $tidy->clean(join("\n", @{$File->{Content}}));
- $cleaned = Encode::decode_utf8($cleaned);
- $File->{Tidy} = $cleaned;
- };
- if ($@) {
- (my $errmsg = $@) =~ s/ at .*//s;
- &add_warning('W29', { W29_msg => $errmsg });
- }
+ eval {
+ local $SIG{__DIE__};
+ require HTML::Tidy;
+ my $tidy = HTML::Tidy->new({config_file => $CFG->{Paths}->{TidyConf}});
+ my $cleaned = $tidy->clean(join("\n", @{$File->{Content}}));
+ $cleaned = Encode::decode_utf8($cleaned);
+ $File->{Tidy} = $cleaned;
+ };
+ if ($@) {
+ (my $errmsg = $@) =~ s/ at .*//s;
+ &add_warning('W29', {W29_msg => $errmsg});
+ }
}
my $template;
if ($File->{Opt}->{Output} eq 'earl') {
- $template = &get_template($File, 'earl_xml.tmpl');
-} elsif ($File->{Opt}->{Output} eq 'n3') {
- $template = &get_template($File, 'earl_n3.tmpl');
-} elsif ($File->{Opt}->{Output} eq 'json') {
- $template = &get_template($File, 'json_output.tmpl');
-} elsif ($File->{Opt}->{Output} eq 'ucn') {
- $template = &get_template($File, 'ucn_output.tmpl');
-} elsif ($File->{Opt}->{Output} eq 'soap12') {
- if ($CFG->{'Enable SOAP'} != 1) { # API disabled - ideally this should have been sent before performing validation...
- print CGI::header(-status => 503, -content_language => "en",
- -type => "text/html", -charset => "utf-8"
- );
- $template = &get_template($File, 'soap_disabled.tmpl');
- } elsif ($File->{'Error Flagged'}) { # should send SOAP fault message
- $template = &get_template($File, 'soap_fault.tmpl');
- # we fill the soap fault template
- #with the variables that had been passed to the HTML fatal error template
- my $errtmpl = &get_template($File, 'fatal-error.tmpl');
- foreach my $fparam ($errtmpl->param()) {
- $template->param($fparam => $errtmpl->param($fparam));
- }
- } else {
- $template = &get_template($File, 'soap_output.tmpl');
- }
-} else {
- $template = &get_template($File, 'result.tmpl');
+ $template = &get_template($File, 'earl_xml.tmpl');
+}
+elsif ($File->{Opt}->{Output} eq 'n3') {
+ $template = &get_template($File, 'earl_n3.tmpl');
+}
+elsif ($File->{Opt}->{Output} eq 'json') {
+ $template = &get_template($File, 'json_output.tmpl');
+}
+elsif ($File->{Opt}->{Output} eq 'ucn') {
+ $template = &get_template($File, 'ucn_output.tmpl');
+}
+elsif ($File->{Opt}->{Output} eq 'soap12') {
+ if ($CFG->{'Enable SOAP'} != 1)
+ { # API disabled - ideally this should have been sent before performing validation...
+ print CGI::header(
+ -status => 503,
+ -content_language => "en",
+ -type => "text/html",
+ -charset => "utf-8"
+ );
+ $template = &get_template($File, 'soap_disabled.tmpl');
+ }
+ elsif ($File->{'Error Flagged'}) { # should send SOAP fault message
+ $template = &get_template($File, 'soap_fault.tmpl');
+
+ # we fill the soap fault template
+ #with the variables that had been passed to the HTML fatal error template
+ my $errtmpl = &get_template($File, 'fatal-error.tmpl');
+ foreach my $fparam ($errtmpl->param()) {
+ $template->param($fparam => $errtmpl->param($fparam));
+ }
+ }
+ else {
+ $template = &get_template($File, 'soap_output.tmpl');
+ }
+}
+else {
+ $template = &get_template($File, 'result.tmpl');
}
&prep_template($File, $template);
&fin_template($File, $template);
$template->param(file_warnings => $File->{Warnings});
-$template->param(tidy_output => $File->{Tidy});
-$template->param(file_source => &source($File))
- if ($template->param('opt_show_source') or ($File->{'Is Upload'}) or ($File->{'Direct Input'}));
+$template->param(tidy_output => $File->{Tidy});
+$template->param(file_source => &source($File))
+ if ($template->param('opt_show_source') or
+ ($File->{'Is Upload'}) or
+ ($File->{'Direct Input'}));
if ($File->{Opt}->{Output} eq 'json') {
- # No JSON escaping in HTML::Template (and "JS" is not the right thing here)
- my $json = JSON->new();
- $json->allow_nonref(TRUE);
- for my $msgs ($template->param("file_errors"),
- $template->param("file_warnings")) {
- next unless $msgs;
- for my $msg (@$msgs) {
- for my $key (qw(msg expl)) {
- $msg->{$key} = $json->encode($msg->{$key}) if $msg->{$key};
- }
+
+ # No JSON escaping in HTML::Template (and "JS" is not the right thing here)
+ my $json = JSON->new();
+ $json->allow_nonref(TRUE);
+ for my $msgs ($template->param("file_errors"),
+ $template->param("file_warnings"))
+ {
+ next unless $msgs;
+ for my $msg (@$msgs) {
+ for my $key (qw(msg expl)) {
+ $msg->{$key} = $json->encode($msg->{$key}) if $msg->{$key};
+ }
+ }
}
- }
}
# transcode output from perl's internal to utf-8 and output
@@ -897,935 +993,1005 @@ print Encode::encode('UTF-8', $template->output);
undef $File;
exit;
-
#############################################################################
# Subroutine definitions
#############################################################################
-sub get_template ($$) {
- my ($File, $fname) = @_;
- if (!$File->{_Templates}->{$fname}) {
- my $tmpl =
- HTML::Template->new(%{$File->{Template_Defaults}}, filename => $fname);
- $tmpl->param(cfg_home_page => $CFG->{'Home Page'});
- $File->{_Templates}->{$fname} = $tmpl;
- }
- return $File->{_Templates}->{$fname};
+sub get_template ($$)
+{
+ my ($File, $fname) = @_;
+ if (!$File->{_Templates}->{$fname}) {
+ my $tmpl = HTML::Template->new(%{$File->{Template_Defaults}},
+ filename => $fname);
+ $tmpl->param(cfg_home_page => $CFG->{'Home Page'});
+ $File->{_Templates}->{$fname} = $tmpl;
+ }
+ return $File->{_Templates}->{$fname};
}
# TODO: need to bring in fixes from html5_validate() here
-sub compoundxml_validate (\$) {
- my $File = shift;
- my $ua = new W3C::Validator::UserAgent ($CFG, $File);
+sub compoundxml_validate (\$)
+{
+ my $File = shift;
+ my $ua = new W3C::Validator::UserAgent($CFG, $File);
+
+ $File->{ParserName} = "validator.nu";
+ $File->{ParserOpts} = "";
- $File->{ParserName} = "validator.nu";
- $File->{ParserOpts} = "";
+ my $url = URI->new($CFG->{External}->{CompoundXML});
+ $url->query_form(out => "xml");
- my $url = URI->new($CFG->{External}->{CompoundXML});
- $url->query_form(out => "xml");
+ my $req = HTTP::Request->new(POST => $url);
- my $req = HTTP::Request->new(POST => $url);
+ if ($File->{Opt}->{DOCTYPE} || $File->{Charset}->{Override}) {
- if ($File->{Opt}->{DOCTYPE} || $File->{Charset}->{Override}) {
- # Doctype or charset overridden, need to use $File->{Content} in UTF-8
- # because $File->{Bytes} is not affected by the overrides. This will
- # most likely be a source of errors about internal/actual charset
- # differences as long as our transcoding process does not "fix" the
- # charset info in XML declaration and meta http-equiv (any others?).
- if($File->{'Direct Input'}) { # sane default when using html5 validator by direct input
- $req->content_type("application/xml; charset=UTF-8");
+ # Doctype or charset overridden, need to use $File->{Content} in UTF-8
+ # because $File->{Bytes} is not affected by the overrides. This will
+ # most likely be a source of errors about internal/actual charset
+ # differences as long as our transcoding process does not "fix" the
+ # charset info in XML declaration and meta http-equiv (any others?).
+ if ($File->{'Direct Input'})
+ { # sane default when using html5 validator by direct input
+ $req->content_type("application/xml; charset=UTF-8");
+ }
+ else {
+ $req->content_type("$File->{ContentType}; charset=UTF-8");
+ }
+ $req->content(Encode::encode_utf8(join("\n", @{$File->{Content}})));
}
else {
- $req->content_type("$File->{ContentType}; charset=UTF-8");
- }
- $req->content(Encode::encode_utf8(join("\n", @{$File->{Content}})));
- }
- else {
- # Pass original bytes, Content-Type and charset as-is.
- # We trust that our and validator.nu's interpretation of line numbers
- # is the same (regardless of EOL chars used in the document).
-
- my @content_type = ($File->{ContentType} => undef);
- push(@content_type, charset => $File->{Charset}->{HTTP})
- if $File->{Charset}->{HTTP};
-
- $req->content_type(HTTP::Headers::Util::join_header_words(@content_type));
- $req->content_ref(\$File->{Bytes});
- }
-
- $req->content_language($File->{ContentLang}) if $File->{ContentLang};
- # Intentionally using direct header access instead of $req->last_modified
- $req->header('Last-Modified', $File->{Modified}) if $File->{Modified};
-
- # If not in debug mode, gzip the request (LWP >= 5.817)
- eval { $req->encode("gzip"); } unless $File->{Opt}->{Debug};
-
- my $res = $ua->request($req);
- if (! $res->is_success()) {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_no_checker => TRUE,
- fatal_missing_checker => 'HTML5 Validator',
- );
- }
- else {
- my $content = &get_content($File, $res);
- return $File if $File->{'Error Flagged'};
- # and now we parse according to http://wiki.whatwg.org/wiki/Validator.nu_XML_Output
- # I wish we could use XML::LibXML::Reader here. but SHAME on those major
- # unix distributions still shipping with libxml2 2.6.16… 4 years after its release
- my $xml_reader = XML::LibXML->new();
- my $xmlDOM;
- eval { $xmlDOM = $xml_reader->parse_string( $content);};
- if ($@) {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_no_checker => TRUE,
- fatal_missing_checker => 'HTML5 Validator',
- );
- return $File;
- }
- my @nodelist = $xmlDOM->getElementsByTagName("messages");
- my $messages_node = $nodelist[0];
- my @message_nodes = $messages_node->childNodes;
- foreach my $message_node (@message_nodes) {
- my $message_type = $message_node->localname;
- my $err;
- my ($xml_error_line, $xml_error_col, $xml_error_msg, $xml_error_expl);
- if ($message_type eq "error") {
- $err->{type} = "E";
- $File->{'Is Valid'} = FALSE;
- }
- elsif ($message_type eq "info") {
- $err->{type} = "I"; # by default - we find warnings in the type attribute (below)
- }
- if ($message_node->hasAttributes()) {
- my @attributelist = $message_node->attributes();
- foreach my $attribute (@attributelist) {
- if($attribute->name eq "type"){
- if (($attribute->getValue() eq "warning") and ($message_type eq "info")) {
- $err->{type} = "W";
+ # Pass original bytes, Content-Type and charset as-is.
+ # We trust that our and validator.nu's interpretation of line numbers
+ # is the same (regardless of EOL chars used in the document).
+
+ my @content_type = ($File->{ContentType} => undef);
+ push(@content_type, charset => $File->{Charset}->{HTTP})
+ if $File->{Charset}->{HTTP};
+
+ $req->content_type(
+ HTTP::Headers::Util::join_header_words(@content_type));
+ $req->content_ref(\$File->{Bytes});
+ }
+
+ $req->content_language($File->{ContentLang}) if $File->{ContentLang};
+
+ # Intentionally using direct header access instead of $req->last_modified
+ $req->header('Last-Modified', $File->{Modified}) if $File->{Modified};
+
+ # If not in debug mode, gzip the request (LWP >= 5.817)
+ eval { $req->encode("gzip"); } unless $File->{Opt}->{Debug};
+
+ my $res = $ua->request($req);
+ if (!$res->is_success()) {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_no_checker => TRUE,
+ fatal_missing_checker => 'HTML5 Validator',
+ );
+ }
+ else {
+ my $content = &get_content($File, $res);
+ return $File if $File->{'Error Flagged'};
+
+ # and now we parse according to http://wiki.whatwg.org/wiki/Validator.nu_XML_Output
+ # I wish we could use XML::LibXML::Reader here. but SHAME on those major
+ # unix distributions still shipping with libxml2 2.6.16… 4 years after its release
+ my $xml_reader = XML::LibXML->new();
+ my $xmlDOM;
+ eval { $xmlDOM = $xml_reader->parse_string($content); };
+ if ($@) {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_no_checker => TRUE,
+ fatal_missing_checker => 'HTML5 Validator',
+ );
+ return $File;
+ }
+ my @nodelist = $xmlDOM->getElementsByTagName("messages");
+ my $messages_node = $nodelist[0];
+ my @message_nodes = $messages_node->childNodes;
+ foreach my $message_node (@message_nodes) {
+ my $message_type = $message_node->localname;
+ my $err;
+ my ($xml_error_line, $xml_error_col,
+ $xml_error_msg, $xml_error_expl
+ );
+ if ($message_type eq "error") {
+ $err->{type} = "E";
+ $File->{'Is Valid'} = FALSE;
+ }
+ elsif ($message_type eq "info") {
+ $err->{type} = "I"
+ ; # by default - we find warnings in the type attribute (below)
+ }
+ if ($message_node->hasAttributes()) {
+ my @attributelist = $message_node->attributes();
+ foreach my $attribute (@attributelist) {
+ if ($attribute->name eq "type") {
+ if (($attribute->getValue() eq "warning") and
+ ($message_type eq "info"))
+ {
+ $err->{type} = "W";
+ }
+
+ }
+ if ($attribute->name eq "last-column") {
+ $xml_error_col = $attribute->getValue();
+ }
+ if ($attribute->name eq "last-line") {
+ $xml_error_line = $attribute->getValue();
+ }
+
+ }
+ }
+ my @child_nodes = $message_node->childNodes;
+ foreach my $child_node (@child_nodes) {
+ if ($child_node->localname eq "message") {
+ $xml_error_msg = $child_node->toString();
+ $xml_error_msg =~ s,</?[^>]*>,,gsi;
+ }
+ if ($child_node->localname eq "elaboration") {
+ $xml_error_expl = $child_node->toString();
+ $xml_error_expl =~ s,</?elaboration>,,gi;
+ $xml_error_expl =
+ "\n<div class=\"ve xml\">$xml_error_expl</div>\n";
+ }
+ }
+
+ # formatting the error message for output
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $xml_error_line;
+ $err->{char} = $xml_error_col;
+ $err->{num} = 'validator.nu';
+ $err->{msg} = $xml_error_msg;
+ $err->{expl} = $xml_error_expl;
+
+ if ($err->{msg} =~
+ /Using the preset for (.*) based on the root namespace/)
+ {
+ $File->{DOCTYPE} = $1;
}
+ else {
+ push @{$File->{Errors}}, $err;
+ }
+
+ # @@ TODO message explanation / elaboration
+ }
+ }
+ return $File;
+}
+
+sub html5_validate (\$)
+{
+ my $File = shift;
+ my $ua = new W3C::Validator::UserAgent($CFG, $File);
+
+ $File->{ParserName} = "validator.nu";
+ $File->{ParserOpts} = "";
+
+ my $url = URI->new($CFG->{External}->{HTML5});
+ $url->query_form(out => "xml");
+
+ my $req = HTTP::Request->new(POST => $url);
- }
- if($attribute->name eq "last-column") {
- $xml_error_col = $attribute->getValue();
- }
- if($attribute->name eq "last-line") {
- $xml_error_line = $attribute->getValue();
- }
-
- }
- }
- my @child_nodes = $message_node->childNodes;
- foreach my $child_node (@child_nodes) {
- if ($child_node->localname eq "message") {
- $xml_error_msg= $child_node->toString();
- $xml_error_msg =~ s,</?[^>]*>,,gsi;
- }
- if ($child_node->localname eq "elaboration") {
- $xml_error_expl = $child_node->toString();
- $xml_error_expl =~ s,</?elaboration>,,gi;
- $xml_error_expl = "\n<div class=\"ve xml\">$xml_error_expl</div>\n";
- }
- }
- # formatting the error message for output
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $xml_error_line;
- $err->{char} = $xml_error_col;
- $err->{num} = 'validator.nu';
- $err->{msg} = $xml_error_msg;
- $err->{expl} = $xml_error_expl;
-
- if ($err->{msg} =~ /Using the preset for (.*) based on the root namespace/ ) {
- $File->{DOCTYPE} = $1;
- }
- else {
- push @{$File->{Errors}}, $err;
- }
- # @@ TODO message explanation / elaboration
- }
- }
- return $File;
-}
-
-
-sub html5_validate (\$) {
- my $File = shift;
- my $ua = new W3C::Validator::UserAgent ($CFG, $File);
-
- $File->{ParserName} = "validator.nu";
- $File->{ParserOpts} = "";
-
- my $url = URI->new($CFG->{External}->{HTML5});
- $url->query_form(out => "xml");
-
- my $req = HTTP::Request->new(POST => $url);
-
- if ($File->{Opt}->{DOCTYPE} || $File->{Charset}->{Override}) {
- # Doctype or charset overridden, need to use $File->{Content} in UTF-8
- # because $File->{Bytes} is not affected by the overrides. This will
- # most likely be a source of errors about internal/actual charset
- # differences as long as our transcoding process does not "fix" the
- # charset info in XML declaration, meta http-equiv/charset and/or BOM
- # (any others?).
-
- my $ct = $File->{'Direct Input'} ? "text/html" : $File->{ContentType};
- $req->content_type("$ct; charset=UTF-8");
- $req->content(Encode::encode_utf8(join("\n", @{$File->{Content}})));
- }
- else {
- # Pass original bytes, Content-Type and charset as-is.
- # We trust that our and validator.nu's interpretation of line numbers
- # is the same later when displaying error contexts (regardless of EOL chars
- # used in the document).
-
- if ($File->{'Direct Input'}) {
- $req->content_type("text/html; charset=UTF-8");
+ if ($File->{Opt}->{DOCTYPE} || $File->{Charset}->{Override}) {
+
+ # Doctype or charset overridden, need to use $File->{Content} in UTF-8
+ # because $File->{Bytes} is not affected by the overrides. This will
+ # most likely be a source of errors about internal/actual charset
+ # differences as long as our transcoding process does not "fix" the
+ # charset info in XML declaration, meta http-equiv/charset and/or BOM
+ # (any others?).
+
+ my $ct = $File->{'Direct Input'} ? "text/html" : $File->{ContentType};
+ $req->content_type("$ct; charset=UTF-8");
+ $req->content(Encode::encode_utf8(join("\n", @{$File->{Content}})));
}
else {
- my @ct = ($File->{ContentType} => undef);
- push(@ct, charset => $File->{Charset}->{HTTP})
- if $File->{Charset}->{HTTP};
- $req->content_type(HTTP::Headers::Util::join_header_words(@ct));
- }
- $req->content_ref(\$File->{Bytes});
- }
-
- $req->content_language($File->{ContentLang}) if $File->{ContentLang};
- # Intentionally using direct header access instead of $req->last_modified
- # (the latter takes seconds since epoch, but $File->{Modified} is an already
- # formatted string).
- $req->header('Last-Modified', $File->{Modified}) if $File->{Modified};
-
- # If not in debug mode, gzip the request (LWP >= 5.817)
- eval { $req->encode("gzip"); } unless $File->{Opt}->{Debug};
-
- my $res = $ua->request($req);
- if (! $res->is_success()) {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_no_checker => TRUE,
- fatal_missing_checker => 'HTML5 Validator',
- );
- }
- else {
- my $content = &get_content($File, $res);
- return $File if $File->{'Error Flagged'};
- # and now we parse according to http://wiki.whatwg.org/wiki/Validator.nu_XML_Output
- # I wish we could use XML::LibXML::Reader here. but SHAME on those major
- # unix distributions still shipping with libxml2 2.6.16… 4 years after its release
- my $xml_reader = XML::LibXML->new();
- my $xmlDOM;
- eval { $xmlDOM = $xml_reader->parse_string( $content);};
- if ($@) {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_no_checker => TRUE,
- fatal_missing_checker => 'HTML5 Validator',
- );
- return $File;
- }
- my @nodelist = $xmlDOM->getElementsByTagName("messages");
- my $messages_node = $nodelist[0];
- my @message_nodes = $messages_node->childNodes;
- foreach my $message_node (@message_nodes) {
- my $message_type = $message_node->localname;
- my $err;
- my ($html5_error_line, $html5_error_col, $html5_error_msg, $html5_error_expl);
- # TODO: non-document errors should receive different/better treatment,
- # but this is better than hiding all problems for now (#6747)
- if ($message_type eq "error" || $message_type eq "non-document-error") {
- $err->{type} = "E";
- $File->{'Is Valid'} = FALSE;
- }
- elsif ($message_type eq "info") {
- $err->{type} = "I"; # by default - we find warnings in the type attribute (below)
- }
- if ($message_node->hasAttributes()) {
- my @attributelist = $message_node->attributes();
- foreach my $attribute (@attributelist) {
- if($attribute->name eq "type"){
- if (($attribute->getValue() eq "warning") and ($message_type eq "info")) {
- $err->{type} = "W";
+ # Pass original bytes, Content-Type and charset as-is.
+ # We trust that our and validator.nu's interpretation of line numbers
+ # is the same later when displaying error contexts (regardless of EOL chars
+ # used in the document).
+
+ if ($File->{'Direct Input'}) {
+ $req->content_type("text/html; charset=UTF-8");
+ }
+ else {
+ my @ct = ($File->{ContentType} => undef);
+ push(@ct, charset => $File->{Charset}->{HTTP})
+ if $File->{Charset}->{HTTP};
+ $req->content_type(HTTP::Headers::Util::join_header_words(@ct));
+ }
+ $req->content_ref(\$File->{Bytes});
+ }
+
+ $req->content_language($File->{ContentLang}) if $File->{ContentLang};
+
+ # Intentionally using direct header access instead of $req->last_modified
+ # (the latter takes seconds since epoch, but $File->{Modified} is an already
+ # formatted string).
+ $req->header('Last-Modified', $File->{Modified}) if $File->{Modified};
+
+ # If not in debug mode, gzip the request (LWP >= 5.817)
+ eval { $req->encode("gzip"); } unless $File->{Opt}->{Debug};
+
+ my $res = $ua->request($req);
+ if (!$res->is_success()) {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_no_checker => TRUE,
+ fatal_missing_checker => 'HTML5 Validator',
+ );
+ }
+ else {
+ my $content = &get_content($File, $res);
+ return $File if $File->{'Error Flagged'};
+
+ # and now we parse according to http://wiki.whatwg.org/wiki/Validator.nu_XML_Output
+ # I wish we could use XML::LibXML::Reader here. but SHAME on those major
+ # unix distributions still shipping with libxml2 2.6.16… 4 years after its release
+ my $xml_reader = XML::LibXML->new();
+ my $xmlDOM;
+ eval { $xmlDOM = $xml_reader->parse_string($content); };
+ if ($@) {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_no_checker => TRUE,
+ fatal_missing_checker => 'HTML5 Validator',
+ );
+ return $File;
+ }
+ my @nodelist = $xmlDOM->getElementsByTagName("messages");
+ my $messages_node = $nodelist[0];
+ my @message_nodes = $messages_node->childNodes;
+ foreach my $message_node (@message_nodes) {
+ my $message_type = $message_node->localname;
+ my $err;
+ my ($html5_error_line, $html5_error_col,
+ $html5_error_msg, $html5_error_expl
+ );
+
+ # TODO: non-document errors should receive different/better treatment,
+ # but this is better than hiding all problems for now (#6747)
+ if ($message_type eq "error" ||
+ $message_type eq "non-document-error")
+ {
+ $err->{type} = "E";
+ $File->{'Is Valid'} = FALSE;
}
+ elsif ($message_type eq "info") {
+ $err->{type} = "I"
+ ; # by default - we find warnings in the type attribute (below)
+ }
+ if ($message_node->hasAttributes()) {
+ my @attributelist = $message_node->attributes();
+ foreach my $attribute (@attributelist) {
+ if ($attribute->name eq "type") {
+ if (($attribute->getValue() eq "warning") and
+ ($message_type eq "info"))
+ {
+ $err->{type} = "W";
+ }
+
+ }
+ elsif ($attribute->name eq "last-column") {
+ $html5_error_col = $attribute->getValue();
+ }
+ elsif ($attribute->name eq "last-line") {
+ $html5_error_line = $attribute->getValue();
+ }
- }
- elsif ($attribute->name eq "last-column") {
- $html5_error_col = $attribute->getValue();
- }
- elsif ($attribute->name eq "last-line") {
- $html5_error_line = $attribute->getValue();
- }
-
- }
- }
- my @child_nodes = $message_node->childNodes;
- foreach my $child_node (@child_nodes) {
- if ($child_node->localname eq "message") {
- $html5_error_msg = $child_node->textContent();
- }
- elsif ($child_node->localname eq "elaboration") {
- $html5_error_expl = $child_node->toString();
- $html5_error_expl =~ s,</?elaboration>,,gi;
- $html5_error_expl = "\n<div class=\"ve html5\">$html5_error_expl</div>\n";
- }
- }
- # formatting the error message for output
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $html5_error_line;
- $err->{char} = $html5_error_col;
- $err->{num} = 'html5';
- $err->{msg} = $html5_error_msg;
- $err->{expl} = $html5_error_expl;
- push @{$File->{Errors}}, $err;
- # @@ TODO message explanation / elaboration
- }
- }
- return $File;
-}
-
-
-sub dtd_validate (\$) {
- my $File = shift;
- my $opensp = SGML::Parser::OpenSP->new();
- my $parser_name = "SGML::Parser::OpenSP";
- #
- # By default, use SGML catalog file and SGML Declaration.
- my $catalog = catfile($CFG->{Paths}->{SGML}->{Library}, 'sgml.soc');
-
- # default parsing options
- my @spopt = qw(valid non-sgml-char-ref no-duplicate);
-
- #
- # Switch to XML semantics if file is XML.
- if (&is_xml($File)) {
- $catalog = catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc');
- push(@spopt, 'xml');
- }
- else {
- # add warnings for shorttags
- push(@spopt, 'min-tag');
- }
-
- $File->{ParserName} = $parser_name;
- $File->{ParserOpts} = join(" ", @spopt);
-
- #
- # Parser configuration
- $opensp->search_dirs($CFG->{Paths}->{SGML}->{Library});
- $opensp->catalogs($catalog);
- $opensp->show_error_numbers(1);
- $opensp->warnings(@spopt);
-
- #
- # Restricted file reading is disabled on Win32 for the time
- # beeing since neither SGML::Parser::OpenSP nor check auto-
- # magically set search_dirs to include the temp directory
- # so restricted file reading would defunct the Validator.
- $opensp->restrict_file_reading(1) unless $^O eq 'MSWin32';
-
-
- my $h; # event handler
- if ($File->{Opt}->{Outline}) {
- $h = W3C::Validator::EventHandler::Outliner->new($opensp, $File, $CFG);
- }
- else {
- $h = W3C::Validator::EventHandler->new($opensp, $File, $CFG);
- }
-
- $opensp->handler($h);
- $opensp->parse_string(join"\n",@{$File->{Content}});
-
- # Make sure there are no circular references, otherwise the script
- # would leak memory until mod_perl unloads it which could take some
- # time. @@FIXME It's probably overly careful though.
- $opensp->handler(undef);
- undef $h->{_parser};
- undef $h->{_file};
- undef $h;
- undef $opensp;
-
- #
- # Set Version to be the FPI initially.
- $File->{Version} = $File->{DOCTYPE};
- return $File;
+ }
+ }
+ my @child_nodes = $message_node->childNodes;
+ foreach my $child_node (@child_nodes) {
+ if ($child_node->localname eq "message") {
+ $html5_error_msg = $child_node->textContent();
+ }
+ elsif ($child_node->localname eq "elaboration") {
+ $html5_error_expl = $child_node->toString();
+ $html5_error_expl =~ s,</?elaboration>,,gi;
+ $html5_error_expl =
+ "\n<div class=\"ve html5\">$html5_error_expl</div>\n";
+ }
+ }
+
+ # formatting the error message for output
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $html5_error_line;
+ $err->{char} = $html5_error_col;
+ $err->{num} = 'html5';
+ $err->{msg} = $html5_error_msg;
+ $err->{expl} = $html5_error_expl;
+ push @{$File->{Errors}}, $err;
+
+ # @@ TODO message explanation / elaboration
+ }
+ }
+ return $File;
+}
+
+sub dtd_validate (\$)
+{
+ my $File = shift;
+ my $opensp = SGML::Parser::OpenSP->new();
+ my $parser_name = "SGML::Parser::OpenSP";
+
+ #
+ # By default, use SGML catalog file and SGML Declaration.
+ my $catalog = catfile($CFG->{Paths}->{SGML}->{Library}, 'sgml.soc');
+
+ # default parsing options
+ my @spopt = qw(valid non-sgml-char-ref no-duplicate);
+
+ #
+ # Switch to XML semantics if file is XML.
+ if (&is_xml($File)) {
+ $catalog = catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc');
+ push(@spopt, 'xml');
+ }
+ else {
+
+ # add warnings for shorttags
+ push(@spopt, 'min-tag');
+ }
+
+ $File->{ParserName} = $parser_name;
+ $File->{ParserOpts} = join(" ", @spopt);
+
+ #
+ # Parser configuration
+ $opensp->search_dirs($CFG->{Paths}->{SGML}->{Library});
+ $opensp->catalogs($catalog);
+ $opensp->show_error_numbers(1);
+ $opensp->warnings(@spopt);
+
+ #
+ # Restricted file reading is disabled on Win32 for the time
+ # beeing since neither SGML::Parser::OpenSP nor check auto-
+ # magically set search_dirs to include the temp directory
+ # so restricted file reading would defunct the Validator.
+ $opensp->restrict_file_reading(1) unless $^O eq 'MSWin32';
+
+ my $h; # event handler
+ if ($File->{Opt}->{Outline}) {
+ $h = W3C::Validator::EventHandler::Outliner->new($opensp, $File, $CFG);
+ }
+ else {
+ $h = W3C::Validator::EventHandler->new($opensp, $File, $CFG);
+ }
+
+ $opensp->handler($h);
+ $opensp->parse_string(join "\n", @{$File->{Content}});
+
+ # Make sure there are no circular references, otherwise the script
+ # would leak memory until mod_perl unloads it which could take some
+ # time. @@FIXME It's probably overly careful though.
+ $opensp->handler(undef);
+ undef $h->{_parser};
+ undef $h->{_file};
+ undef $h;
+ undef $opensp;
+
+ #
+ # Set Version to be the FPI initially.
+ $File->{Version} = $File->{DOCTYPE};
+ return $File;
}
#
# Generate HTML report.
-sub prep_template ($$) {
- my $File = shift;
- my $T = shift;
-
- #
- # XML mode...
- $T->param(is_xml => &is_xml($File));
-
- #
- # Upload?
- $T->param(is_upload => $File->{'Is Upload'});
-
- #
- # Direct Input?
- $T->param(is_direct_input => $File->{'Direct Input'});
-
- #
- # The URI...
- $T->param(file_uri => $File->{URI});
- $T->param(file_uri_param => uri_escape($File->{URI}));
-
- #
- # Set URL for page title.
- $T->param(page_title_url => $File->{URI});
-
- #
- # Metadata...
- $T->param(file_modified => $File->{Modified});
- $T->param(file_server => $File->{Server});
- $T->param(file_size => $File->{Size});
- $T->param(file_contenttype => $File->{ContentType});
- $T->param(file_charset => $File->{Charset}->{Use});
- $T->param(file_doctype => $File->{DOCTYPE});
-
- #
- # Output options...
- $T->param(opt_show_source => $File->{Opt}->{'Show Source'});
- $T->param(opt_show_tidy => $File->{Opt}->{'Show Tidy'});
- $T->param(opt_show_outline => $File->{Opt}->{Outline});
- $T->param(opt_verbose => $File->{Opt}->{Verbose});
- $T->param(opt_group_errors => $File->{Opt}->{'Group Errors'});
- $T->param(opt_no200 => $File->{Opt}->{No200});
-
- #
- # Tip of the Day...
- my $tip = &get_tip();
- $T->param(tip_uri => $tip->[0]);
- $T->param(tip_slug => $tip->[1]);
-
- # Root Element
- $T->param(root_element => $File->{Root});
-
- # Namespaces...
- $T->param(file_namespace => $File->{Namespace});
- my %seen_ns = ();
- my @bulk_ns = @{$File->{Namespaces}};
- $File->{Namespaces} = []; # reinitialize the list of non-root namespaces
- # ... and then get a uniq version of it
- foreach my $single_namespace (@bulk_ns) {
- push(@{$File->{Namespaces}}, $single_namespace) unless (($single_namespace eq $File->{Namespace}) or $seen_ns{$single_namespace}++);
- }
- my @nss = map({uri => $_}, @{$File->{Namespaces}});
- $T->param(file_namespaces => \@nss) if @nss;
-
- if ($File->{Opt}->{DOCTYPE}) {
- my $over_doctype_param = "override doctype $File->{Opt}->{DOCTYPE}";
- $T->param($over_doctype_param => TRUE);
- }
-
- if ($File->{Opt}->{Charset}) {
- my $over_charset_param = "override charset $File->{Opt}->{Charset}";
- $T->param($over_charset_param => TRUE);
- }
-
- # Allow content-negotiation
- if ($File->{Opt}->{'Accept Header'}) {
- $T->param('accept' => $File->{Opt}->{'Accept Header'});
- }
- if ($File->{Opt}->{'Accept-Language Header'}) {
- $T->param('accept-language' => $File->{Opt}->{'Accept-Language Header'});
- }
- if ($File->{Opt}->{'Accept-Charset Header'}) {
- $T->param('accept-charset' => $File->{Opt}->{'Accept-Charset Header'});
- }
- if ($File->{Opt}->{'User Agent'}) {
- $T->param('user-agent' => $File->{Opt}->{'User Agent'});
- }
- if ($File->{'Error Flagged'}) {
- $T->param(fatal_error => TRUE);
- }
-}
-
-sub fin_template ($$) {
- my $File = shift;
- my $T = shift;
-
-
-
- #
- # Set debug info for HTML report.
- $T->param(opt_debug => $DEBUG);
- $T->param(debug =>
- [
- map({name => $_, value => $ENV{$_}},
- qw(no_proxy http_proxy https_proxy ftp_proxy FTP_PASSIVE)),
- { name => 'Content-Encoding', value => $File->{ContentEnc} },
- { name => 'Content-Language', value => $File->{ContentLang} },
- { name => 'Content-Location', value => $File->{ContentLoc} },
- { name => 'Transfer-Encoding', value => $File->{TransferEnc} },
- { name => 'Parse Mode', value => $File->{Mode} },
- { name => 'Parse Mode Factor', value => $File->{ModeChoice} },
- { name => 'Parser', value => $File->{ParserName} },
- { name => 'Parser Options', value => $File->{ParserOpts} },
- ],
- );
-
- if (! $File->{Doctype} and ($File->{Version} eq 'unknown' or $File->{Version} eq 'SGML' or (!$File->{Version}))) {
-
- my $default_doctype = (&is_xml($File) ?
- $File->{"Default DOCTYPE"}->{"XHTML"} : $File->{"Default DOCTYPE"}->{"HTML"});
- $T->param(file_version => "$default_doctype");
- }
- else {
- $T->param(file_version => $File->{Version});
- }
- my ($num_errors,$num_warnings, $num_info, $reported_errors) = &report_errors($File);
- if ($num_errors+$num_warnings > 0)
- {
- $T->param(has_errors => 1);
- }
- $T->param(valid_errors_num => $num_errors);
- $num_warnings += scalar @{$File->{Warnings}};
- $T->param(valid_warnings_num => $num_warnings);
- my $number_of_errors = ""; # textual form of $num_errors
- my $number_of_warnings = ""; # textual form of $num_errors
-
-# The following is a bit hack-ish, but will enable us to have some logic
-# for a human-readable display of the number, with cases for 0, 1, 2 and above
-# (the case of 2 appears to be useful for localization in some languages where the plural is different for 2, and above)
-
- if ($num_errors > 1) {
- $T->param(number_of_errors_is_0 => FALSE );
- $T->param(number_of_errors_is_1 => FALSE);
- if ($num_errors eq 2) {
- $T->param(number_of_errors_is_2 => TRUE);
+sub prep_template ($$)
+{
+ my $File = shift;
+ my $T = shift;
+
+ #
+ # XML mode...
+ $T->param(is_xml => &is_xml($File));
+
+ #
+ # Upload?
+ $T->param(is_upload => $File->{'Is Upload'});
+
+ #
+ # Direct Input?
+ $T->param(is_direct_input => $File->{'Direct Input'});
+
+ #
+ # The URI...
+ $T->param(file_uri => $File->{URI});
+ $T->param(file_uri_param => uri_escape($File->{URI}));
+
+ #
+ # Set URL for page title.
+ $T->param(page_title_url => $File->{URI});
+
+ #
+ # Metadata...
+ $T->param(file_modified => $File->{Modified});
+ $T->param(file_server => $File->{Server});
+ $T->param(file_size => $File->{Size});
+ $T->param(file_contenttype => $File->{ContentType});
+ $T->param(file_charset => $File->{Charset}->{Use});
+ $T->param(file_doctype => $File->{DOCTYPE});
+
+ #
+ # Output options...
+ $T->param(opt_show_source => $File->{Opt}->{'Show Source'});
+ $T->param(opt_show_tidy => $File->{Opt}->{'Show Tidy'});
+ $T->param(opt_show_outline => $File->{Opt}->{Outline});
+ $T->param(opt_verbose => $File->{Opt}->{Verbose});
+ $T->param(opt_group_errors => $File->{Opt}->{'Group Errors'});
+ $T->param(opt_no200 => $File->{Opt}->{No200});
+
+ #
+ # Tip of the Day...
+ my $tip = &get_tip();
+ $T->param(tip_uri => $tip->[0]);
+ $T->param(tip_slug => $tip->[1]);
+
+ # Root Element
+ $T->param(root_element => $File->{Root});
+
+ # Namespaces...
+ $T->param(file_namespace => $File->{Namespace});
+ my %seen_ns = ();
+ my @bulk_ns = @{$File->{Namespaces}};
+ $File->{Namespaces} = []; # reinitialize the list of non-root namespaces
+ # ... and then get a uniq version of it
+ foreach my $single_namespace (@bulk_ns) {
+ push(@{$File->{Namespaces}}, $single_namespace)
+ unless (($single_namespace eq $File->{Namespace}) or
+ $seen_ns{$single_namespace}++);
+ }
+ my @nss = map({uri => $_}, @{$File->{Namespaces}});
+ $T->param(file_namespaces => \@nss) if @nss;
+
+ if ($File->{Opt}->{DOCTYPE}) {
+ my $over_doctype_param = "override doctype $File->{Opt}->{DOCTYPE}";
+ $T->param($over_doctype_param => TRUE);
+ }
+
+ if ($File->{Opt}->{Charset}) {
+ my $over_charset_param = "override charset $File->{Opt}->{Charset}";
+ $T->param($over_charset_param => TRUE);
+ }
+
+ # Allow content-negotiation
+ if ($File->{Opt}->{'Accept Header'}) {
+ $T->param('accept' => $File->{Opt}->{'Accept Header'});
+ }
+ if ($File->{Opt}->{'Accept-Language Header'}) {
+ $T->param(
+ 'accept-language' => $File->{Opt}->{'Accept-Language Header'});
+ }
+ if ($File->{Opt}->{'Accept-Charset Header'}) {
+ $T->param('accept-charset' => $File->{Opt}->{'Accept-Charset Header'});
+ }
+ if ($File->{Opt}->{'User Agent'}) {
+ $T->param('user-agent' => $File->{Opt}->{'User Agent'});
+ }
+ if ($File->{'Error Flagged'}) {
+ $T->param(fatal_error => TRUE);
+ }
+}
+
+sub fin_template ($$)
+{
+ my $File = shift;
+ my $T = shift;
+
+ #
+ # Set debug info for HTML report.
+ $T->param(opt_debug => $DEBUG);
+ $T->param(
+ debug => [
+ map({name => $_, value => $ENV{$_}},
+ qw(no_proxy http_proxy https_proxy ftp_proxy FTP_PASSIVE)),
+ {name => 'Content-Encoding', value => $File->{ContentEnc}},
+ {name => 'Content-Language', value => $File->{ContentLang}},
+ {name => 'Content-Location', value => $File->{ContentLoc}},
+ {name => 'Transfer-Encoding', value => $File->{TransferEnc}},
+ {name => 'Parse Mode', value => $File->{Mode}},
+ {name => 'Parse Mode Factor', value => $File->{ModeChoice}},
+ {name => 'Parser', value => $File->{ParserName}},
+ {name => 'Parser Options', value => $File->{ParserOpts}},
+ ],
+ );
+
+ if (!$File->{Doctype} and
+ ($File->{Version} eq 'unknown' or
+ $File->{Version} eq 'SGML' or
+ (!$File->{Version}))
+ )
+ {
+
+ my $default_doctype =
+ (&is_xml($File) ? $File->{"Default DOCTYPE"}->{"XHTML"} :
+ $File->{"Default DOCTYPE"}->{"HTML"});
+ $T->param(file_version => "$default_doctype");
}
else {
- $T->param(number_of_errors_is_2 => FALSE );
- }
- $T->param(number_of_errors_is_plural => TRUE );
- }
- elsif ($num_errors eq 1) {
- $T->param(number_of_errors_is_0 => FALSE );
- $T->param(number_of_errors_is_1 => TRUE );
- $T->param(number_of_errors_is_2 => FALSE );
- $T->param(number_of_errors_is_plural => FALSE );
- }
- else { # 0
- $T->param(number_of_errors_is_0 => TRUE );
- $T->param(number_of_errors_is_1 => FALSE );
- $T->param(number_of_errors_is_2 => FALSE );
- $T->param(number_of_errors_is_plural => FALSE );
- }
-
- if ($num_warnings > 1) {
- $T->param(number_of_warnings_is_0 => FALSE );
- $T->param(number_of_warnings_is_1 => FALSE);
- if ($num_warnings eq 2) {
- $T->param(number_of_warnings_is_2 => TRUE);
+ $T->param(file_version => $File->{Version});
+ }
+ my ($num_errors, $num_warnings, $num_info, $reported_errors) =
+ &report_errors($File);
+ if ($num_errors + $num_warnings > 0) {
+ $T->param(has_errors => 1);
+ }
+ $T->param(valid_errors_num => $num_errors);
+ $num_warnings += scalar @{$File->{Warnings}};
+ $T->param(valid_warnings_num => $num_warnings);
+ my $number_of_errors = ""; # textual form of $num_errors
+ my $number_of_warnings = ""; # textual form of $num_errors
+
+ # The following is a bit hack-ish, but will enable us to have some logic
+ # for a human-readable display of the number, with cases for 0, 1, 2 and above
+ # (the case of 2 appears to be useful for localization in some languages where the plural is different for 2, and above)
+
+ if ($num_errors > 1) {
+ $T->param(number_of_errors_is_0 => FALSE);
+ $T->param(number_of_errors_is_1 => FALSE);
+ if ($num_errors eq 2) {
+ $T->param(number_of_errors_is_2 => TRUE);
+ }
+ else {
+ $T->param(number_of_errors_is_2 => FALSE);
+ }
+ $T->param(number_of_errors_is_plural => TRUE);
+ }
+ elsif ($num_errors eq 1) {
+ $T->param(number_of_errors_is_0 => FALSE);
+ $T->param(number_of_errors_is_1 => TRUE);
+ $T->param(number_of_errors_is_2 => FALSE);
+ $T->param(number_of_errors_is_plural => FALSE);
+ }
+ else { # 0
+ $T->param(number_of_errors_is_0 => TRUE);
+ $T->param(number_of_errors_is_1 => FALSE);
+ $T->param(number_of_errors_is_2 => FALSE);
+ $T->param(number_of_errors_is_plural => FALSE);
+ }
+
+ if ($num_warnings > 1) {
+ $T->param(number_of_warnings_is_0 => FALSE);
+ $T->param(number_of_warnings_is_1 => FALSE);
+ if ($num_warnings eq 2) {
+ $T->param(number_of_warnings_is_2 => TRUE);
+ }
+ else {
+ $T->param(number_of_warnings_is_2 => FALSE);
+ }
+ $T->param(number_of_warnings_is_plural => TRUE);
+ }
+ elsif ($num_warnings eq 1) {
+ $T->param(number_of_warnings_is_0 => FALSE);
+ $T->param(number_of_warnings_is_1 => TRUE);
+ $T->param(number_of_warnings_is_2 => FALSE);
+ $T->param(number_of_warnings_is_plural => FALSE);
+ }
+ else { # 0
+ $T->param(number_of_warnings_is_0 => TRUE);
+ $T->param(number_of_warnings_is_1 => FALSE);
+ $T->param(number_of_warnings_is_2 => FALSE);
+ $T->param(number_of_warnings_is_plural => FALSE);
+ }
+
+ $T->param(file_outline => $File->{heading_outline})
+ if $File->{Opt}->{Outline};
+
+ $T->param(file_errors => $reported_errors);
+ if ($File->{'Is Valid'}) {
+ $T->param(VALID => TRUE);
+ $T->param(valid_status => 'Valid');
+ &report_valid($File, $T);
}
else {
- $T->param(number_of_warnings_is_2 => FALSE);
- }
- $T->param(number_of_warnings_is_plural => TRUE );
- }
- elsif ($num_warnings eq 1) {
- $T->param(number_of_warnings_is_0 => FALSE );
- $T->param(number_of_warnings_is_1 => TRUE );
- $T->param(number_of_warnings_is_2 => FALSE );
- $T->param(number_of_warnings_is_plural => FALSE );
- }
- else { # 0
- $T->param(number_of_warnings_is_0 => TRUE );
- $T->param(number_of_warnings_is_1 => FALSE );
- $T->param(number_of_warnings_is_2 => FALSE );
- $T->param(number_of_warnings_is_plural => FALSE );
- }
-
- $T->param(file_outline => $File->{heading_outline})
- if $File->{Opt}->{Outline};
-
- $T->param(file_errors => $reported_errors);
- if ($File->{'Is Valid'}) {
- $T->param(VALID => TRUE);
- $T->param(valid_status => 'Valid');
- &report_valid($File, $T);
- } else {
- $T->param(VALID => FALSE);
- $T->param(valid_status => 'Invalid');
- }
+ $T->param(VALID => FALSE);
+ $T->param(valid_status => 'Invalid');
+ }
}
#
# Output "This page is Valid" report.
-sub report_valid {
- my $File = shift;
- my $T = shift;
-
- unless ($File->{Version} eq 'unknown' or defined $File->{Tentative}) {
-
- if (exists $CFG->{Types}->{$File->{DOCTYPE}}->{Badge}) {
- my $cfg = $CFG->{Types}->{$File->{DOCTYPE}};
- $T->param(
- badge_uri => $cfg->{Badge}->{URI},
- local_badge_uri => $cfg->{Badge}->{'Local URI'},
- badge_alt_uri => $cfg->{Badge}->{'Alt URI'},
- local_alt_badge_uri => $cfg->{Badge}->{'Local ALT URI'},
- badge_alt => $cfg->{Badge}->{Alt},
- badge_rdfa => $cfg->{Badge}->{RDFa},
- badge_h => $cfg->{Badge}->{Height},
- badge_w => $cfg->{Badge}->{Width},
- badge_tagc => $cfg->{'Parse Mode'} eq 'XML' ? ' /' : '',
- );
- }
- } elsif (defined $File->{Tentative}) {
- $T->param(is_tentative => TRUE);
- }
-
- if ($File->{XMLWF_ONLY}){
- $T->param(xmlwf_only => TRUE);
- }
- my $thispage = self_url_file($File);
- $T->param(file_thispage => $thispage);
+sub report_valid
+{
+ my $File = shift;
+ my $T = shift;
+
+ unless ($File->{Version} eq 'unknown' or defined $File->{Tentative}) {
+
+ if (exists $CFG->{Types}->{$File->{DOCTYPE}}->{Badge}) {
+ my $cfg = $CFG->{Types}->{$File->{DOCTYPE}};
+ $T->param(
+ badge_uri => $cfg->{Badge}->{URI},
+ local_badge_uri => $cfg->{Badge}->{'Local URI'},
+ badge_alt_uri => $cfg->{Badge}->{'Alt URI'},
+ local_alt_badge_uri => $cfg->{Badge}->{'Local ALT URI'},
+ badge_alt => $cfg->{Badge}->{Alt},
+ badge_rdfa => $cfg->{Badge}->{RDFa},
+ badge_h => $cfg->{Badge}->{Height},
+ badge_w => $cfg->{Badge}->{Width},
+ badge_tagc => $cfg->{'Parse Mode'} eq 'XML' ? ' /' : '',
+ );
+ }
+ }
+ elsif (defined $File->{Tentative}) {
+ $T->param(is_tentative => TRUE);
+ }
+
+ if ($File->{XMLWF_ONLY}) {
+ $T->param(xmlwf_only => TRUE);
+ }
+ my $thispage = self_url_file($File);
+ $T->param(file_thispage => $thispage);
}
#
# Add a warning message to the output.
-sub add_warning ($$) {
- my $WID = shift;
- my $params = shift;
+sub add_warning ($$)
+{
+ my $WID = shift;
+ my $params = shift;
- push @{$File->{Warnings}}, $WID;
+ push @{$File->{Warnings}}, $WID;
- my %tmplparams = (
- $WID => TRUE,
- have_warnings => TRUE,
- %$params,
- );
- for my $tmpl (qw(result fatal-error soap_output)) {
- &get_template($File, "$tmpl.tmpl")->param(%tmplparams);
- }
+ my %tmplparams = (
+ $WID => TRUE,
+ have_warnings => TRUE,
+ %$params,
+ );
+ for my $tmpl (qw(result fatal-error soap_output)) {
+ &get_template($File, "$tmpl.tmpl")->param(%tmplparams);
+ }
}
#
# Proxy authentication requests.
# Note: expects the third argument to be a hash ref (see HTTP::Headers::Auth).
-sub authenticate {
- my $File = shift;
- my $resource = shift;
- my $authHeader = shift || {};
+sub authenticate
+{
+ my $File = shift;
+ my $resource = shift;
+ my $authHeader = shift || {};
- my $realm = $resource;
- $realm =~ s([^\w\d.-]*){}g;
+ my $realm = $resource;
+ $realm =~ s([^\w\d.-]*){}g;
- for my $scheme (keys(%$authHeader)) {
- my $origrealm = $authHeader->{$scheme}->{realm};
- if (not defined $origrealm or lc($scheme) !~ /^(?:basic|digest)$/) {
- delete($authHeader->{$scheme});
- next;
+ for my $scheme (keys(%$authHeader)) {
+ my $origrealm = $authHeader->{$scheme}->{realm};
+ if (not defined $origrealm or lc($scheme) !~ /^(?:basic|digest)$/) {
+ delete($authHeader->{$scheme});
+ next;
+ }
+ $authHeader->{$scheme}->{realm} = "$realm-$origrealm";
}
- $authHeader->{$scheme}->{realm} = "$realm-$origrealm";
- }
- my $headers = HTTP::Headers->new(Connection => 'close');
- $headers->www_authenticate(%$authHeader);
- $headers = $headers->as_string();
- chomp($headers);
+ my $headers = HTTP::Headers->new(Connection => 'close');
+ $headers->www_authenticate(%$authHeader);
+ $headers = $headers->as_string();
+ chomp($headers);
- my $tmpl = &get_template($File, 'http_401_authrequired.tmpl');
- $tmpl->param(http_401_headers => $headers, http_401_url => $resource);
+ my $tmpl = &get_template($File, 'http_401_authrequired.tmpl');
+ $tmpl->param(http_401_headers => $headers, http_401_url => $resource);
- print Encode::encode('UTF-8', $tmpl->output);
- exit; # Further interaction will be a new HTTP request.
+ print Encode::encode('UTF-8', $tmpl->output);
+ exit; # Further interaction will be a new HTTP request.
}
#
# Fetch an URL and return the content and selected meta-info.
-sub handle_uri {
- my $q = shift; # The CGI object.
- my $File = shift; # The master datastructure.
+sub handle_uri
+{
+ my $q = shift; # The CGI object.
+ my $File = shift; # The master datastructure.
- my $uri = new URI (ref $q ? $q->param('uri') : $q)->canonical();
- $uri->fragment(undef);
+ my $uri = new URI(ref $q ? $q->param('uri') : $q)->canonical();
+ $uri->fragment(undef);
- my $ua = new W3C::Validator::UserAgent ($CFG, $File);
+ my $ua = new W3C::Validator::UserAgent($CFG, $File);
- unless ($ua->is_protocol_supported($uri)) {
- $File->{'Error Flagged'} = TRUE;
- my $tmpl = &get_template($File, 'fatal-error.tmpl');
- if (($uri->canonical() eq "1") )
- #if uri param is empty (also for empty direct or upload), it's been set to TRUE in sub prepCGI()
- {
- $tmpl->param(fatal_no_content => TRUE);
+ unless ($ua->is_protocol_supported($uri)) {
+ $File->{'Error Flagged'} = TRUE;
+ my $tmpl = &get_template($File, 'fatal-error.tmpl');
+ if (($uri->canonical() eq "1"))
+
+ #if uri param is empty (also for empty direct or upload), it's been set to TRUE in sub prepCGI()
+ {
+ $tmpl->param(fatal_no_content => TRUE);
+ }
+ else {
+ $tmpl->param(
+ fatal_uri_error => TRUE,
+ fatal_uri_scheme => $uri->scheme()
+ );
+ }
+ return $File;
}
- else {
- $tmpl->param(fatal_uri_error => TRUE, fatal_uri_scheme => $uri->scheme());
+
+ return $File unless $ua->uri_ok($uri);
+
+ my $req = new HTTP::Request(GET => $uri);
+
+ # if one wants to use the accept, accept-charset and accept-language params
+ # in order to trigger specific negotiation
+ if ($File->{Opt}->{'Accept Header'}) {
+ $req->header(Accept => $File->{Opt}->{'Accept Header'});
}
- return $File;
- }
-
- return $File unless $ua->uri_ok($uri);
-
- my $req = new HTTP::Request(GET => $uri);
-
- # if one wants to use the accept, accept-charset and accept-language params
- # in order to trigger specific negotiation
- if ($File->{Opt}->{'Accept Header'}) {
- $req->header(Accept => $File->{Opt}->{'Accept Header'});
- }
- if ($File->{Opt}->{'Accept-Language Header'}) {
- $req->header(Accept_Language => $File->{Opt}->{'Accept-Language Header'});
- }
- if ($File->{Opt}->{'Accept-Charset Header'}) {
- $req->header(Accept_Charset => $File->{Opt}->{'Accept-Charset Header'});
- }
-
- # All Apache configurations don't set HTTP_AUTHORIZATION for CGI scripts.
- # If we're under mod_perl, there is a way around it...
- eval {
- local $SIG{__DIE__};
- my $auth = Apache2::RequestUtil->request()->headers_in()->{Authorization};
- $ENV{HTTP_AUTHORIZATION} = $auth if $auth;
- } if (IS_MODPERL2() && !$ENV{HTTP_AUTHORIZATION});
-
- # If we got a Authorization header, the client is back at it after being
- # prompted for a password so we insert the header as is in the request.
- $req->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION})
- if $ENV{HTTP_AUTHORIZATION};
-
- my $res = $ua->request($req);
-
- return $File if $File->{'Error Flagged'}; # Redirect IP rejected?
-
- unless ($res->code == 200 or $File->{Opt}->{'No200'}) {
- if ($res->code == 401) {
- my %auth = $res->www_authenticate(); # HTTP::Headers::Auth
- &authenticate($File, $res->request->uri, \%auth);
- } else {
- $File->{'Error Flagged'} = TRUE;
-
- my $no200url = undef;
- if (!$File->{Opt}->{No200}) {
- # $File->{URI} not set yet; setting it non-local has side effects
- local $File->{URI} = $uri->as_string;
- local $File->{Opt}->{No200} = TRUE;
- $no200url = &self_url_file($File);
- }
-
- my $warning = $res->header("Client-Warning");
- if ($warning && $warning =~ /Internal response/i) {
- # Response doc generated internally by LWP, no need to show that info
- # nor to provide error doc validation link to it.
- $warning = undef;
- $no200url = undef;
- }
-
- my $tmpl = &get_template($File, 'fatal-error.tmpl');
- $tmpl->param(
- fatal_http_error => TRUE,
- fatal_http_uri => $uri->as_string,
- fatal_http_code => $res->code,
- fatal_http_msg => $res->message,
- fatal_http_warn => $warning,
- fatal_http_no200 => $no200url,
- );
- $tmpl->param(fatal_http_dns => TRUE) if ($res->code == 500);
+ if ($File->{Opt}->{'Accept-Language Header'}) {
+ $req->header(
+ Accept_Language => $File->{Opt}->{'Accept-Language Header'});
+ }
+ if ($File->{Opt}->{'Accept-Charset Header'}) {
+ $req->header(
+ Accept_Charset => $File->{Opt}->{'Accept-Charset Header'});
}
- return $File;
- }
+ # All Apache configurations don't set HTTP_AUTHORIZATION for CGI scripts.
+ # If we're under mod_perl, there is a way around it...
+ eval {
+ local $SIG{__DIE__};
+ my $auth =
+ Apache2::RequestUtil->request()->headers_in()->{Authorization};
+ $ENV{HTTP_AUTHORIZATION} = $auth if $auth;
+ } if (IS_MODPERL2() && !$ENV{HTTP_AUTHORIZATION});
- #
- # Enforce Max Recursion level.
- &check_recursion($File, $res);
+ # If we got a Authorization header, the client is back at it after being
+ # prompted for a password so we insert the header as is in the request.
+ $req->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION})
+ if $ENV{HTTP_AUTHORIZATION};
- my ($mode, $ct, $charset)
- = &parse_content_type(
- $File,
- scalar($res->header('Content-Type')),
- scalar($res->request->uri),
- );
+ my $res = $ua->request($req);
- my $content = &get_content($File, $res);
- return $File if $File->{'Error Flagged'};
+ return $File if $File->{'Error Flagged'}; # Redirect IP rejected?
- $File->{Bytes} = $content;
- $File->{Mode} = $mode;
- $File->{ContentType} = $ct;
- $File->{ContentEnc} = $res->content_encoding;
- $File->{ContentLang} = $res->content_language;
- $File->{ContentLoc} = $res->header('Content-Location');
- $File->{TransferEnc} = $res->header('Client-Transfer-Encoding');
- $File->{Charset}->{HTTP} = lc $charset;
- $File->{Modified} = $res->header('Last-Modified');
- $File->{Server} = scalar $res->server;
+ unless ($res->code == 200 or $File->{Opt}->{'No200'}) {
+ if ($res->code == 401) {
+ my %auth = $res->www_authenticate(); # HTTP::Headers::Auth
+ &authenticate($File, $res->request->uri, \%auth);
+ }
+ else {
+ $File->{'Error Flagged'} = TRUE;
- # TODO: Content-Length is not always set, so either this should
- # be renamed to 'Content-Length' or it should consider more than
- # the Content-Length header.
- $File->{Size} = scalar $res->content_length;
- $File->{URI} = scalar $res->request->uri->canonical;
- $File->{'Is Upload'} = FALSE;
- $File->{'Direct Input'} = FALSE;
+ my $no200url = undef;
+ if (!$File->{Opt}->{No200}) {
+ # $File->{URI} not set yet; setting it non-local has side effects
+ local $File->{URI} = $uri->as_string;
+ local $File->{Opt}->{No200} = TRUE;
+ $no200url = &self_url_file($File);
+ }
- return $File;
-}
+ my $warning = $res->header("Client-Warning");
+ if ($warning && $warning =~ /Internal response/i) {
-#
-# Handle uploaded file and return the content and selected meta-info.
-sub handle_file {
- my $q = shift; # The CGI object.
- my $File = shift; # The master datastructure.
+ # Response doc generated internally by LWP, no need to show that info
+ # nor to provide error doc validation link to it.
+ $warning = undef;
+ $no200url = undef;
+ }
- my $f = $q->param('uploaded_file');
- my $h = $q->uploadInfo($f);
- my $file;
+ my $tmpl = &get_template($File, 'fatal-error.tmpl');
+ $tmpl->param(
+ fatal_http_error => TRUE,
+ fatal_http_uri => $uri->as_string,
+ fatal_http_code => $res->code,
+ fatal_http_msg => $res->message,
+ fatal_http_warn => $warning,
+ fatal_http_no200 => $no200url,
+ );
+ $tmpl->param(fatal_http_dns => TRUE) if ($res->code == 500);
+ }
- local $/ = undef; # set line delimiter so that <> reads rest of file
- $file = <$f>;
+ return $File;
+ }
- my ($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'});
+ #
+ # Enforce Max Recursion level.
+ &check_recursion($File, $res);
- $File->{Bytes} = $file;
- $File->{Mode} = $mode;
- $File->{ContentType} = $ct;
- $File->{Charset}->{HTTP} = lc $charset;
- $File->{Modified} = $q->http('Last-Modified');
- $File->{Server} = $q->http('User-Agent'); # Fake a "server". :-)
- $File->{Size} = $q->http('Content-Length');
- $File->{URI} = "$f";
- $File->{'Is Upload'} = TRUE;
- $File->{'Direct Input'} = FALSE;
+ my ($mode, $ct, $charset) = &parse_content_type(
+ $File,
+ scalar($res->header('Content-Type')),
+ scalar($res->request->uri),
+ );
- return $File;
+ my $content = &get_content($File, $res);
+ return $File if $File->{'Error Flagged'};
+
+ $File->{Bytes} = $content;
+ $File->{Mode} = $mode;
+ $File->{ContentType} = $ct;
+ $File->{ContentEnc} = $res->content_encoding;
+ $File->{ContentLang} = $res->content_language;
+ $File->{ContentLoc} = $res->header('Content-Location');
+ $File->{TransferEnc} = $res->header('Client-Transfer-Encoding');
+ $File->{Charset}->{HTTP} = lc $charset;
+ $File->{Modified} = $res->header('Last-Modified');
+ $File->{Server} = scalar $res->server;
+
+ # TODO: Content-Length is not always set, so either this should
+ # be renamed to 'Content-Length' or it should consider more than
+ # the Content-Length header.
+ $File->{Size} = scalar $res->content_length;
+ $File->{URI} = scalar $res->request->uri->canonical;
+ $File->{'Is Upload'} = FALSE;
+ $File->{'Direct Input'} = FALSE;
+
+ return $File;
}
#
# Handle uploaded file and return the content and selected meta-info.
-sub handle_frag {
- my $q = shift; # The CGI object.
- my $File = shift; # The master datastructure.
-
- $File->{Bytes} = $q->param('fragment');
- $File->{Mode} = 'TBD';
- $File->{Modified} = '';
- $File->{Server} = '';
- $File->{Size} = '';
- $File->{ContentType} = ''; # @@TODO?
- $File->{URI} = 'upload://Form Submission';
- $File->{'Is Upload'} = FALSE;
- $File->{'Direct Input'} = TRUE;
- $File->{Charset}->{HTTP} = "utf-8"; # by default, the form accepts utf-8 chars
-
- if ($File->{Opt}->{Prefill}) {
- # we surround the HTML fragment with some basic document structure
- my $prefill_Template;
- if ($File->{Opt}->{'Prefill Doctype'} eq 'html401') {
- $prefill_Template = &get_template($File, 'prefill_html401.tmpl');
- }
- else {
- $prefill_Template = &get_template($File, 'prefill_xhtml10.tmpl');
+sub handle_file
+{
+ my $q = shift; # The CGI object.
+ my $File = shift; # The master datastructure.
+
+ my $f = $q->param('uploaded_file');
+ my $h = $q->uploadInfo($f);
+ my $file;
+
+ local $/ = undef; # set line delimiter so that <> reads rest of file
+ $file = <$f>;
+
+ my ($mode, $ct, $charset) =
+ &parse_content_type($File, $h->{'Content-Type'});
+
+ $File->{Bytes} = $file;
+ $File->{Mode} = $mode;
+ $File->{ContentType} = $ct;
+ $File->{Charset}->{HTTP} = lc $charset;
+ $File->{Modified} = $q->http('Last-Modified');
+ $File->{Server} = $q->http('User-Agent'); # Fake a "server". :-)
+ $File->{Size} = $q->http('Content-Length');
+ $File->{URI} = "$f";
+ $File->{'Is Upload'} = TRUE;
+ $File->{'Direct Input'} = FALSE;
+
+ return $File;
+}
+
+#
+# Handle uploaded file and return the content and selected meta-info.
+sub handle_frag
+{
+ my $q = shift; # The CGI object.
+ my $File = shift; # The master datastructure.
+
+ $File->{Bytes} = $q->param('fragment');
+ $File->{Mode} = 'TBD';
+ $File->{Modified} = '';
+ $File->{Server} = '';
+ $File->{Size} = '';
+ $File->{ContentType} = ''; # @@TODO?
+ $File->{URI} = 'upload://Form Submission';
+ $File->{'Is Upload'} = FALSE;
+ $File->{'Direct Input'} = TRUE;
+ $File->{Charset}->{HTTP} =
+ "utf-8"; # by default, the form accepts utf-8 chars
+
+ if ($File->{Opt}->{Prefill}) {
+
+ # we surround the HTML fragment with some basic document structure
+ my $prefill_Template;
+ if ($File->{Opt}->{'Prefill Doctype'} eq 'html401') {
+ $prefill_Template = &get_template($File, 'prefill_html401.tmpl');
+ }
+ else {
+ $prefill_Template = &get_template($File, 'prefill_xhtml10.tmpl');
+ }
+ $prefill_Template->param(fragment => $File->{Bytes});
+ $File->{Bytes} = $prefill_Template->output();
+
+ # Let's force the view source so that the user knows what we've put around
+ # their code.
+ $File->{Opt}->{'Show Source'} = TRUE;
+
+ # Ignore doctype overrides (#5132).
+ $File->{Opt}->{DOCTYPE} = 'Inline';
}
- $prefill_Template->param(fragment => $File->{Bytes});
- $File->{Bytes} = $prefill_Template->output();
- # Let's force the view source so that the user knows what we've put around
- # their code.
- $File->{Opt}->{'Show Source'} = TRUE;
- # Ignore doctype overrides (#5132).
- $File->{Opt}->{DOCTYPE} = 'Inline';
- }
- return $File;
+ return $File;
}
#
# Parse a Content-Type and parameters. Return document type and charset.
-sub parse_content_type {
- my $File = shift;
- my $Content_Type = shift;
- my $url = shift;
- my $charset = '';
+sub parse_content_type
+{
+ my $File = shift;
+ my $Content_Type = shift;
+ my $url = shift;
+ my $charset = '';
- my ($ct) = lc($Content_Type) =~ /^\s*([^\s;]*)/g;
+ my ($ct) = lc($Content_Type) =~ /^\s*([^\s;]*)/g;
- my $mode = $CFG->{MIME}->{$ct} || $ct;
+ my $mode = $CFG->{MIME}->{$ct} || $ct;
- $charset = HTML::Encoding::encoding_from_content_type($Content_Type);
+ $charset = HTML::Encoding::encoding_from_content_type($Content_Type);
- if ($mode =~ m(/)) { # a "/" means it's unknown or we'd have a mode here.
- if ($ct eq 'text/css' and defined $url) {
- print redirect
- 'http://jigsaw.w3.org/css-validator/validator?uri='
- . uri_escape $url;
- exit;
- } elsif ($ct eq 'application/atom+xml' and defined $url) {
- print redirect
- 'http://validator.w3.org/feed/check.cgi?url='
- . uri_escape $url;
- exit;
- } elsif ($ct =~ m(^application/.+\+xml$)) {
- # unknown media types which should be XML - we give these a try
- $mode = "XML";
- } else {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_mime_error => TRUE,
- fatal_mime_ct => $ct,
- );
+ if ($mode =~ m(/)) { # a "/" means it's unknown or we'd have a mode here.
+ if ($ct eq 'text/css' and defined $url) {
+ print redirect
+ 'http://jigsaw.w3.org/css-validator/validator?uri=' .
+ uri_escape $url;
+ exit;
+ }
+ elsif ($ct eq 'application/atom+xml' and defined $url) {
+ print redirect 'http://validator.w3.org/feed/check.cgi?url=' .
+ uri_escape $url;
+ exit;
+ }
+ elsif ($ct =~ m(^application/.+\+xml$)) {
+
+ # unknown media types which should be XML - we give these a try
+ $mode = "XML";
+ }
+ else {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_mime_error => TRUE,
+ fatal_mime_ct => $ct,
+ );
+ }
}
- }
- return $mode, $ct, $charset;
+ return $mode, $ct, $charset;
}
#
# Get content with Content-Encodings decoded from a response.
-sub get_content ($$) {
- my $File = shift;
- my $res = shift;
-
- my $content;
- eval {
- $content = $res->decoded_content(charset => 'none', raise_error => 1);
- };
- if ($@) {
- (my $errmsg = $@) =~ s/ at .*//s;
- my $cenc = $res->header("Content-Encoding");
- my $uri = $res->request->uri;
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_decode_error => TRUE,
- fatal_decode_errmsg => $errmsg,
- fatal_decode_cenc => $cenc,
- # Include URI because it might be a subsystem (eg. HTML5 validator) one
- fatal_decode_uri => $uri,
- );
- }
+sub get_content ($$)
+{
+ my $File = shift;
+ my $res = shift;
+
+ my $content;
+ eval {
+ $content = $res->decoded_content(charset => 'none', raise_error => 1);
+ };
+ if ($@) {
+ (my $errmsg = $@) =~ s/ at .*//s;
+ my $cenc = $res->header("Content-Encoding");
+ my $uri = $res->request->uri;
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_decode_error => TRUE,
+ fatal_decode_errmsg => $errmsg,
+ fatal_decode_cenc => $cenc,
+
+ # Include URI because it might be a subsystem (eg. HTML5 validator) one
+ fatal_decode_uri => $uri,
+ );
+ }
- return $content;
+ return $content;
}
#
# Check recursion level and enforce Max Recursion limit.
-sub check_recursion ($$) {
- my $File = shift;
- my $res = shift;
+sub check_recursion ($$)
+{
+ my $File = shift;
+ my $res = shift;
- # Not looking at our own output.
- return unless defined $res->header('X-W3C-Validator-Recursion');
+ # Not looking at our own output.
+ return unless defined $res->header('X-W3C-Validator-Recursion');
- my $lvl = $res->header('X-W3C-Validator-Recursion');
- return unless $lvl =~ m/^\d+$/; # Non-digit, i.e. garbage, ignore.
+ my $lvl = $res->header('X-W3C-Validator-Recursion');
+ return unless $lvl =~ m/^\d+$/; # Non-digit, i.e. garbage, ignore.
- if ($lvl >= $CFG->{'Max Recursion'}) {
- print redirect $CFG->{'Home Page'};
- } else {
- # Increase recursion level in output.
- &get_template($File, 'result.tmpl')->param(depth => $lvl++);
- }
+ if ($lvl >= $CFG->{'Max Recursion'}) {
+ print redirect $CFG->{'Home Page'};
+ }
+ else {
+
+ # Increase recursion level in output.
+ &get_template($File, 'result.tmpl')->param(depth => $lvl++);
+ }
}
#
@@ -1833,300 +1999,335 @@ sub check_recursion ($$) {
#
# Note that this is used both for HTML and XML escaping.
#
-sub ent {
- local $_ = shift;
- return '' unless defined; # Eliminate warnings
+sub ent
+{
+ local $_ = shift;
+ return '' unless defined; # Eliminate warnings
- # TODO: Err, why have " twice in the character class? ' maybe?
- s(["<&>"]){'&#' . ord($&) . ';'}ge; # should switch to hex sooner or later
- return $_;
+ # TODO: Err, why have " twice in the character class? ' maybe?
+ s(["<&>"]){'&#' . ord($&) . ';'}ge; # should switch to hex sooner or later
+ return $_;
}
#
# Truncate source lines for report.
# Expects 1-based column indexes.
-sub truncate_line {
- my $line = shift;
- my $col = shift;
- my $maxlen = 80; # max line length to truncate to
+sub truncate_line
+{
+ my $line = shift;
+ my $col = shift;
+ my $maxlen = 80; # max line length to truncate to
+
+ my $diff = length($line) - $maxlen;
- my $diff = length($line) - $maxlen;
+ # Don't truncate at all if it fits.
+ return ($line, $col) if ($diff <= 0);
- # Don't truncate at all if it fits.
- return ($line, $col) if ($diff <= 0);
+ my $start = $col - int($maxlen / 2);
+ if ($start < 0) {
- my $start = $col - int($maxlen/2);
- if ($start < 0) {
- # Truncate only from end of line.
- $start = 0;
- $line = substr($line, $start, $maxlen - 1) . '…';
- }
- elsif ($start > $diff) {
- # Truncate only from beginning of line.
- $start = $diff;
- $line = '…' . substr($line, $start + 1);
- }
- else {
- # Truncate from both beginning and end of line.
- $line = '…' . substr($line, $start + 1, $maxlen - 2) . '…';
- }
+ # Truncate only from end of line.
+ $start = 0;
+ $line = substr($line, $start, $maxlen - 1) . '…';
+ }
+ elsif ($start > $diff) {
- # Shift column if we truncated from beginning of line.
- $col -= $start;
+ # Truncate only from beginning of line.
+ $start = $diff;
+ $line = '…' . substr($line, $start + 1);
+ }
+ else {
- return ($line, $col);
+ # Truncate from both beginning and end of line.
+ $line = '…' . substr($line, $start + 1, $maxlen - 2) . '…';
+ }
+
+ # Shift column if we truncated from beginning of line.
+ $col -= $start;
+
+ return ($line, $col);
}
#
# Suppress any existing DOCTYPE by commenting it out.
-sub override_doctype {
- my $File = shift;
-
- my ($dt) =
- grep { $_->{Display} eq $File->{Opt}->{DOCTYPE} } values %{$CFG->{Types}};
-
- # @@TODO: abort/whine about unrecognized doctype if $dt is undef.;
- my $pubid = $dt->{PubID};
- my $sysid = $dt->{SysID};
- my $name = $dt->{Name};
-
- # The HTML5 PubID is a fake, reset it out of the way.
- $pubid = undef if ($pubid eq 'HTML5');
-
- # We don't have public/system ids for all types.
- my $dtd = "<!DOCTYPE $name";
- if ($pubid) {
- $dtd .= qq( PUBLIC "$pubid");
- $dtd .= qq( "$sysid") if $sysid;
- }
- elsif ($sysid) {
- $dtd .= qq( SYSTEM "$sysid");
- }
- $dtd .= '>';
-
- my $org_dtd = '';
- my $HTML = '';
- my $seen_doctype = FALSE;
- my $seen_root = FALSE;
-
- my $declaration = sub {
- my ($tag, $text) = @_;
- if ($seen_doctype || uc($tag) ne '!DOCTYPE') {
- $HTML .= $text;
- return;
- }
-
- $seen_doctype = TRUE;
-
- $org_dtd = &ent($text);
- ($File->{Root}, undef, $File->{DOCTYPE}) = $text =~
- /<!DOCTYPE\s+(\w[\w\.-]+)(?:\s+(?:PUBLIC|SYSTEM)\s+(['"])(.*?)\2)?\s*>/si;
-
- $File->{DOCTYPE} = 'HTML5'
- if (lc($File->{Root} || '') eq 'html' &&
- (!defined($File->{DOCTYPE}) ||
- $File->{DOCTYPE} eq 'about:legacy-compat'));
-
- # No Override if Fallback was requested, or if override is the same as detected
- my $known = $CFG->{Types}->{$File->{DOCTYPE}};
- if ($File->{Opt}->{FB}->{DOCTYPE} or
- ($known && $File->{Opt}->{DOCTYPE} eq $known->{Display})) {
- $HTML .= $text; # Stash it as is...
- } else {
- $HTML .= "$dtd<!-- $text -->";
- }
- };
-
- my $start_element = sub{
- if ($seen_root) {
- $HTML .= $_[0]; # Stash it as is... moving on
+sub override_doctype
+{
+ my $File = shift;
+
+ my ($dt) =
+ grep { $_->{Display} eq $File->{Opt}->{DOCTYPE} }
+ values %{$CFG->{Types}};
+
+ # @@TODO: abort/whine about unrecognized doctype if $dt is undef.;
+ my $pubid = $dt->{PubID};
+ my $sysid = $dt->{SysID};
+ my $name = $dt->{Name};
+
+ # The HTML5 PubID is a fake, reset it out of the way.
+ $pubid = undef if ($pubid eq 'HTML5');
+
+ # We don't have public/system ids for all types.
+ my $dtd = "<!DOCTYPE $name";
+ if ($pubid) {
+ $dtd .= qq( PUBLIC "$pubid");
+ $dtd .= qq( "$sysid") if $sysid;
}
- else {
- $seen_root = TRUE;
- if ($seen_doctype) {
- # doctype addition aldready done, we move on
- $HTML .= $_[0];
- }
- else {
- # no original doctype present, hence none replaced already
- # => we sneak the chosen doctype before the root elt
- $HTML .= "$dtd$_[0]";
- }
+ elsif ($sysid) {
+ $dtd .= qq( SYSTEM "$sysid");
}
- };
+ $dtd .= '>';
- HTML::Parser->new(default_h => [sub {$HTML .= shift}, 'text'],
- declaration_h => [$declaration, 'tag,text'],
- start_h => [$start_element, 'text']
- )->parse(join "\n", @{$File->{Content}})->eof();
+ my $org_dtd = '';
+ my $HTML = '';
+ my $seen_doctype = FALSE;
+ my $seen_root = FALSE;
- $File->{Content} = [split /\n/, $HTML];
+ my $declaration = sub {
+ my ($tag, $text) = @_;
+ if ($seen_doctype || uc($tag) ne '!DOCTYPE') {
+ $HTML .= $text;
+ return;
+ }
- if ($seen_doctype) {
- my $known = $CFG->{Types}->{$File->{DOCTYPE}};
- unless ($File->{Opt}->{FB}->{DOCTYPE} or
- ($known && $File->{Opt}->{DOCTYPE} eq $known->{Display} )) {
- &add_warning('W13', {
- W13_org => $org_dtd,
- W13_new => $File->{Opt}->{DOCTYPE},
- });
- $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
- }
- } else {
- if ($File->{"DOCTYPEless OK"}) {
- &add_warning('W25', {W25_dtd => $File->{Opt}->{DOCTYPE}});
+ $seen_doctype = TRUE;
+
+ $org_dtd = &ent($text);
+ ($File->{Root}, undef, $File->{DOCTYPE}) = $text =~
+ /<!DOCTYPE\s+(\w[\w\.-]+)(?:\s+(?:PUBLIC|SYSTEM)\s+(['"])(.*?)\2)?\s*>/si;
+
+ $File->{DOCTYPE} = 'HTML5'
+ if (
+ lc($File->{Root} || '') eq 'html' &&
+ (!defined($File->{DOCTYPE}) ||
+ $File->{DOCTYPE} eq 'about:legacy-compat')
+ );
+
+ # No Override if Fallback was requested, or if override is the same as detected
+ my $known = $CFG->{Types}->{$File->{DOCTYPE}};
+ if ($File->{Opt}->{FB}->{DOCTYPE} or
+ ($known && $File->{Opt}->{DOCTYPE} eq $known->{Display}))
+ {
+ $HTML .= $text; # Stash it as is...
+ }
+ else {
+ $HTML .= "$dtd<!-- $text -->";
+ }
+ };
+
+ my $start_element = sub {
+ if ($seen_root) {
+ $HTML .= $_[0]; # Stash it as is... moving on
+ }
+ else {
+ $seen_root = TRUE;
+ if ($seen_doctype) {
+
+ # doctype addition aldready done, we move on
+ $HTML .= $_[0];
+ }
+ else {
+
+ # no original doctype present, hence none replaced already
+ # => we sneak the chosen doctype before the root elt
+ $HTML .= "$dtd$_[0]";
+ }
+ }
+ };
+
+ HTML::Parser->new(
+ default_h => [sub { $HTML .= shift }, 'text'],
+ declaration_h => [$declaration, 'tag,text'],
+ start_h => [$start_element, 'text']
+ )->parse(join "\n", @{$File->{Content}})->eof();
+
+ $File->{Content} = [split /\n/, $HTML];
+
+ if ($seen_doctype) {
+ my $known = $CFG->{Types}->{$File->{DOCTYPE}};
+ unless ($File->{Opt}->{FB}->{DOCTYPE} or
+ ($known && $File->{Opt}->{DOCTYPE} eq $known->{Display}))
+ {
+ &add_warning(
+ 'W13',
+ { W13_org => $org_dtd,
+ W13_new => $File->{Opt}->{DOCTYPE},
+ }
+ );
+ $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
+ }
}
- elsif ($File->{Opt}->{FB}->{DOCTYPE}) {
- &add_warning('W16', {W16_dtd => $File->{Opt}->{DOCTYPE}});
- $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
- } else {
- &add_warning('W15', {W15_dtd => $File->{Opt}->{DOCTYPE}});
- $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
+ else {
+ if ($File->{"DOCTYPEless OK"}) {
+ &add_warning('W25', {W25_dtd => $File->{Opt}->{DOCTYPE}});
+ }
+ elsif ($File->{Opt}->{FB}->{DOCTYPE}) {
+ &add_warning('W16', {W16_dtd => $File->{Opt}->{DOCTYPE}});
+ $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
+ }
+ else {
+ &add_warning('W15', {W15_dtd => $File->{Opt}->{DOCTYPE}});
+ $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
+ }
}
- }
- return $File;
+ return $File;
}
#
# Generate a HTML report of detected errors.
-sub report_errors ($) {
- my $File = shift;
- my $Errors = [];
- my %Errors_bytype;
- my $number_of_errors = 0;
- my $number_of_warnings = 0;
- my $number_of_info = 0;
-
- # Hash to keep track of how many of each error is reported.
- my %Msgs; # Used to generate a UID for explanations.
-
- # for the sake of readability, at least until the xmlwf errors have explanations,
- # we push the errors from the XML parser at the END of the error list.
- foreach my $errmsg (@{$File->{WF_Errors}}){
- push @{$File->{Errors}}, $errmsg;
- }
-
-
- if (scalar @{$File->{Errors}}) {
- foreach my $err (@{$File->{Errors}}) {
- my $line;
- my $col = 0;
- # avoid truncating lines that do not exist
- if (defined($err->{line}) && $File->{Content}->[$err->{line}-1]) {
- if (defined($err->{char}) && $err->{char} =~ /^[0-9]+$/ ){
- ($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
- $line = &mark_error($line, $col);
- }
- elsif (defined($err->{line})) {
- $col = length($File->{Content}->[$err->{line}-1]);
- $col = 80 if ($col > 80);
- ($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $col);
- $line = &ent($line);
- $col = 0;
- }
- }
- else {
- $col = 0;
- $line = "";
- }
- my $explanation = "";
- if ($err->{expl}) {
-
- }
- else {
- if ($err->{num}) {
- my $num = $err->{num};
- $explanation .= Encode::decode_utf8("\n $RSRC{msg}->{$num}->{verbose}\n")
- if exists $RSRC{msg}->{$num}
- && exists $RSRC{msg}->{$num}->{verbose};
- my $_msg = $RSRC{msg}->{nomsg}->{verbose};
- $_msg =~ s/<!--MID-->/$num/g;
- if (($File->{'Is Upload'}) or ($File->{'Direct Input'}))
- {
- $_msg =~ s/<!--URI-->//g
- }
- else
- {
- my $escaped_uri = uri_escape($File->{URI});
- $_msg =~ s/<!--URI-->/$escaped_uri/g;
- }
- $explanation = " $_msg\n$explanation"; # The send feedback plea.
- $explanation =~ s/<!--CFG_HOME_PAGE-->/$CFG->{'Home Page'}/g;
- }
- $err->{expl} = $explanation;
- }
-
- $err->{src} = $line;
- $err->{col} = ' ' x $col;
- if ($err->{type} eq 'I')
- {
- $err->{class} = 'msg_info';
- $err->{err_type_err} = 0;
- $err->{err_type_warn} = 0;
- $err->{err_type_info} = 1;
- $number_of_info += 1;
- }
- elsif ($err->{type} eq 'E')
- {
- $err->{class} = 'msg_err';
- $err->{err_type_err} = 1;
- $err->{err_type_warn} = 0;
- $err->{err_type_info} = 0;
- $number_of_errors += 1;
- }
- elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') )
- {
- $err->{class} = 'msg_warn';
- $err->{err_type_err} = 0;
- $err->{err_type_warn} = 1;
- $err->{err_type_info} = 0;
- $number_of_warnings += 1;
- }
- # TODO other classes for "X" etc? FIXME find all types of message.
-
- push @{$Errors}, $err;
-
- if (($File->{Opt}->{'Group Errors'}) and (($err->{type} eq 'E') or ($err->{type} eq 'W')or ($err->{type} eq 'X'))) {
- # index by num for errors and warnings only - info usually give context of error or warning
- if (! exists $Errors_bytype{$err->{num}}) {
- $Errors_bytype{$err->{num}}->{instances} = [];
- my $msg_text;
- if ($err->{num} ne 'xmlwf') {
- $msg_text = $RSRC{msg}->{$err->{num}}->{original};
- $msg_text =~ s/%1/X/;
- $msg_text =~ s/%2/Y/;
- }
- else { ## FIXME ## we need a catalog of errors from our XML parser
- $msg_text = "XML Parsing Error";
- }
- $Errors_bytype{$err->{num}}->{expl} = $err->{expl};
- $Errors_bytype{$err->{num}}->{generic_msg} = $msg_text;
- $Errors_bytype{$err->{num}}->{msg} = $err->{msg};
- $Errors_bytype{$err->{num}}->{type} = $err->{type};
- $Errors_bytype{$err->{num}}->{class} = $err->{class};
- $Errors_bytype{$err->{num}}->{err_type_err} = $err->{err_type_err};
- $Errors_bytype{$err->{num}}->{err_type_warn} = $err->{err_type_warn};
- $Errors_bytype{$err->{num}}->{err_type_info} = $err->{err_type_info};
- }
- push @ { $Errors_bytype{$err->{num}}->{instances} }, $err;
- }
- }
- }
- if ($File->{Opt}->{'Group Errors'}) {
- $Errors = [];
- for my $err_num (keys %Errors_bytype){
- push @{$Errors}, $Errors_bytype{$err_num};
- }
- }
- # we are not sorting errors by line, as it would break the position
- # of auxiliary messages such as "start tag was here". We'll have to live with
- # the fact that XML well-formedness errors are listed first, then validation errors
- #else {
- # sort error by lines
- # @{$Errors} = sort {$a->{line} <=> $b->{line} } @{$Errors};
- #}
- return $number_of_errors, $number_of_warnings, $number_of_info, $Errors;
+sub report_errors ($)
+{
+ my $File = shift;
+ my $Errors = [];
+ my %Errors_bytype;
+ my $number_of_errors = 0;
+ my $number_of_warnings = 0;
+ my $number_of_info = 0;
+
+ # Hash to keep track of how many of each error is reported.
+ my %Msgs; # Used to generate a UID for explanations.
+
+ # for the sake of readability, at least until the xmlwf errors have explanations,
+ # we push the errors from the XML parser at the END of the error list.
+ foreach my $errmsg (@{$File->{WF_Errors}}) {
+ push @{$File->{Errors}}, $errmsg;
+ }
+
+ if (scalar @{$File->{Errors}}) {
+ foreach my $err (@{$File->{Errors}}) {
+ my $line;
+ my $col = 0;
+
+ # avoid truncating lines that do not exist
+ if (defined($err->{line}) && $File->{Content}->[$err->{line} - 1])
+ {
+ if (defined($err->{char}) && $err->{char} =~ /^[0-9]+$/) {
+ ($line, $col) =
+ &truncate_line($File->{Content}->[$err->{line} - 1],
+ $err->{char});
+ $line = &mark_error($line, $col);
+ }
+ elsif (defined($err->{line})) {
+ $col = length($File->{Content}->[$err->{line} - 1]);
+ $col = 80 if ($col > 80);
+ ($line, $col) =
+ &truncate_line($File->{Content}->[$err->{line} - 1],
+ $col);
+ $line = &ent($line);
+ $col = 0;
+ }
+ }
+ else {
+ $col = 0;
+ $line = "";
+ }
+ my $explanation = "";
+ if ($err->{expl}) {
+
+ }
+ else {
+ if ($err->{num}) {
+ my $num = $err->{num};
+ $explanation .= Encode::decode_utf8(
+ "\n $RSRC{msg}->{$num}->{verbose}\n")
+ if exists $RSRC{msg}->{$num} &&
+ exists $RSRC{msg}->{$num}->{verbose};
+ my $_msg = $RSRC{msg}->{nomsg}->{verbose};
+ $_msg =~ s/<!--MID-->/$num/g;
+ if (($File->{'Is Upload'}) or ($File->{'Direct Input'})) {
+ $_msg =~ s/<!--URI-->//g;
+ }
+ else {
+ my $escaped_uri = uri_escape($File->{URI});
+ $_msg =~ s/<!--URI-->/$escaped_uri/g;
+ }
+ $explanation =
+ " $_msg\n$explanation"; # The send feedback plea.
+ $explanation =~
+ s/<!--CFG_HOME_PAGE-->/$CFG->{'Home Page'}/g;
+ }
+ $err->{expl} = $explanation;
+ }
+
+ $err->{src} = $line;
+ $err->{col} = ' ' x $col;
+ if ($err->{type} eq 'I') {
+ $err->{class} = 'msg_info';
+ $err->{err_type_err} = 0;
+ $err->{err_type_warn} = 0;
+ $err->{err_type_info} = 1;
+ $number_of_info += 1;
+ }
+ elsif ($err->{type} eq 'E') {
+ $err->{class} = 'msg_err';
+ $err->{err_type_err} = 1;
+ $err->{err_type_warn} = 0;
+ $err->{err_type_info} = 0;
+ $number_of_errors += 1;
+ }
+ elsif (($err->{type} eq 'W') or ($err->{type} eq 'X')) {
+ $err->{class} = 'msg_warn';
+ $err->{err_type_err} = 0;
+ $err->{err_type_warn} = 1;
+ $err->{err_type_info} = 0;
+ $number_of_warnings += 1;
+ }
+
+ # TODO other classes for "X" etc? FIXME find all types of message.
+
+ push @{$Errors}, $err;
+
+ if (($File->{Opt}->{'Group Errors'}) and
+ (($err->{type} eq 'E') or
+ ($err->{type} eq 'W') or
+ ($err->{type} eq 'X'))
+ )
+ {
+
+ # index by num for errors and warnings only - info usually give context of error or warning
+ if (!exists $Errors_bytype{$err->{num}}) {
+ $Errors_bytype{$err->{num}}->{instances} = [];
+ my $msg_text;
+ if ($err->{num} ne 'xmlwf') {
+ $msg_text = $RSRC{msg}->{$err->{num}}->{original};
+ $msg_text =~ s/%1/X/;
+ $msg_text =~ s/%2/Y/;
+ }
+ else
+ { ## FIXME ## we need a catalog of errors from our XML parser
+ $msg_text = "XML Parsing Error";
+ }
+ $Errors_bytype{$err->{num}}->{expl} = $err->{expl};
+ $Errors_bytype{$err->{num}}->{generic_msg} = $msg_text;
+ $Errors_bytype{$err->{num}}->{msg} = $err->{msg};
+ $Errors_bytype{$err->{num}}->{type} = $err->{type};
+ $Errors_bytype{$err->{num}}->{class} = $err->{class};
+ $Errors_bytype{$err->{num}}->{err_type_err} =
+ $err->{err_type_err};
+ $Errors_bytype{$err->{num}}->{err_type_warn} =
+ $err->{err_type_warn};
+ $Errors_bytype{$err->{num}}->{err_type_info} =
+ $err->{err_type_info};
+ }
+ push @{$Errors_bytype{$err->{num}}->{instances}}, $err;
+ }
+ }
+ }
+ if ($File->{Opt}->{'Group Errors'}) {
+ $Errors = [];
+ for my $err_num (keys %Errors_bytype) {
+ push @{$Errors}, $Errors_bytype{$err_num};
+ }
+ }
+
+ # we are not sorting errors by line, as it would break the position
+ # of auxiliary messages such as "start tag was here". We'll have to live with
+ # the fact that XML well-formedness errors are listed first, then validation errors
+ #else {
+ # sort error by lines
+ # @{$Errors} = sort {$a->{line} <=> $b->{line} } @{$Errors};
+ #}
+ return $number_of_errors, $number_of_warnings, $number_of_info, $Errors;
}
#
@@ -2134,315 +2335,361 @@ sub report_errors ($) {
# was detected, and everything to the left and right of that position.
# That way we can add markup to the relevant char without breaking &ent().
# Expects 1-based column indexes.
-sub mark_error ($$) {
- my $line = shift;
- my $col = shift;
- my $linelen = length($line);
+sub mark_error ($$)
+{
+ my $line = shift;
+ my $col = shift;
+ my $linelen = length($line);
- # Coerce column into an index valid within the line.
- if ($col < 1) {
- $col = 1;
- } elsif ($col > $linelen) {
- $col = $linelen;
- }
- $col--;
+ # Coerce column into an index valid within the line.
+ if ($col < 1) {
+ $col = 1;
+ }
+ elsif ($col > $linelen) {
+ $col = $linelen;
+ }
+ $col--;
- my $left = substr($line, 0, $col);
- my $char = substr($line, $col, 1);
- my $right = substr($line, $col + 1);
+ my $left = substr($line, 0, $col);
+ my $char = substr($line, $col, 1);
+ my $right = substr($line, $col + 1);
- $char = &ent($char);
- $char = qq(<strong title="Position where error was detected.">$char</strong>);
- $line = &ent($left) . $char . &ent($right);
+ $char = &ent($char);
+ $char =
+ qq(<strong title="Position where error was detected.">$char</strong>);
+ $line = &ent($left) . $char . &ent($right);
- return $line;
+ return $line;
}
#
# Create a HTML representation of the document.
-sub source {
- my $File = shift;
+sub source
+{
+ my $File = shift;
- # Remove any BOM since we're not at BOT anymore...
- $File->{Content}->[0] = substr($File->{Content}->[0], 1)
- if ($File->{BOM} && scalar(@{$File->{Content}}));
+ # Remove any BOM since we're not at BOT anymore...
+ $File->{Content}->[0] = substr($File->{Content}->[0], 1)
+ if ($File->{BOM} && scalar(@{$File->{Content}}));
- my @source = map({file_source_line => $_}, @{$File->{Content}});
- return \@source;
+ my @source = map({file_source_line => $_}, @{$File->{Content}});
+ return \@source;
}
-
-sub match_DTD_FPI_SI {
+sub match_DTD_FPI_SI
+{
my ($File, $FPI, $SI) = @_;
if ($CFG->{Types}->{$FPI}) {
- if ($CFG->{Types}->{$FPI}->{SysID}){
+ if ($CFG->{Types}->{$FPI}->{SysID}) {
if ($SI ne $CFG->{Types}->{$FPI}->{SysID}) {
- &add_warning('W26', {W26_dtd_pub => $FPI,
- W26_dtd_pub_display =>$CFG->{Types}->{$FPI}->{Display},
- W26_dtd_sys=> $SI,
- W26_dtd_sys_recommend=> $CFG->{Types}->{$FPI}->{SysID}});
+ &add_warning(
+ 'W26',
+ { W26_dtd_pub => $FPI,
+ W26_dtd_pub_display =>
+ $CFG->{Types}->{$FPI}->{Display},
+ W26_dtd_sys => $SI,
+ W26_dtd_sys_recommend => $CFG->{Types}->{$FPI}->{SysID}
+ }
+ );
}
}
}
- else { # FPI not known, checking if the SI is
+ else { # FPI not known, checking if the SI is
foreach my $proper_FPI (keys %{$CFG->{Types}}) {
if ($CFG->{Types}->{$proper_FPI}->{SysID}) {
if ($CFG->{Types}->{$proper_FPI}->{SysID} eq $SI) {
- &add_warning('W26', {W26_dtd_pub => $FPI,
- W26_dtd_pub_display =>$CFG->{Types}->{$proper_FPI}->{Display},
- W26_dtd_sys => $SI,
- W26_dtd_pub_recommend=> $proper_FPI });
+ &add_warning(
+ 'W26',
+ { W26_dtd_pub => $FPI,
+ W26_dtd_pub_display =>
+ $CFG->{Types}->{$proper_FPI}->{Display},
+ W26_dtd_sys => $SI,
+ W26_dtd_pub_recommend => $proper_FPI
+ }
+ );
}
}
}
}
}
+
#
# Do an initial parse of the Document Entity to extract FPI.
-sub preparse_doctype {
- my $File = shift;
-
- #
- # Reset DOCTYPE, Root (for second invocation, probably not needed anymore).
- $File->{DOCTYPE} = '';
- $File->{Root} = '';
-
- my $dtd = sub {
- return if $File->{Root};
- # TODO: The \s and \w are probably wrong now that the strings are utf8_on
- my $declaration = shift;
- my $doctype_type;
- my $doctype_secondpart;
- if ($declaration =~ /<!DOCTYPE\s+html(?:\s+SYSTEM\s+(['"])about:legacy-compat\1)?\s*>/si) {
- $File->{Root} = "html";
- $File->{DOCTYPE} = "HTML5";
- }
- else {
- ($File->{Root}, $doctype_type, $File->{DOCTYPE}, $doctype_secondpart) = $declaration =~ m(<!DOCTYPE\s+(\w[\w\.-]+)\s+(PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\'])(.*)>)si;
- if (($doctype_type eq "PUBLIC") and (($doctype_secondpart) = $doctype_secondpart =~ m(\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*)si)){
- &match_DTD_FPI_SI($File, $File->{DOCTYPE}, $doctype_secondpart);
- }
- }
- };
+sub preparse_doctype
+{
+ my $File = shift;
+
+ #
+ # Reset DOCTYPE, Root (for second invocation, probably not needed anymore).
+ $File->{DOCTYPE} = '';
+ $File->{Root} = '';
+
+ my $dtd = sub {
+ return if $File->{Root};
+
+ # TODO: The \s and \w are probably wrong now that the strings are utf8_on
+ my $declaration = shift;
+ my $doctype_type;
+ my $doctype_secondpart;
+ if ($declaration =~
+ /<!DOCTYPE\s+html(?:\s+SYSTEM\s+(['"])about:legacy-compat\1)?\s*>/si
+ )
+ {
+ $File->{Root} = "html";
+ $File->{DOCTYPE} = "HTML5";
+ }
+ else {
+ ( $File->{Root}, $doctype_type,
+ $File->{DOCTYPE}, $doctype_secondpart
+ )
+ = $declaration =~
+ m(<!DOCTYPE\s+(\w[\w\.-]+)\s+(PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\'])(.*)>)si;
+ if (($doctype_type eq "PUBLIC") and
+ (($doctype_secondpart) =
+ $doctype_secondpart =~
+ m(\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*)si)
+ )
+ {
+ &match_DTD_FPI_SI($File, $File->{DOCTYPE},
+ $doctype_secondpart);
+ }
+ }
+ };
- my $start = sub {
- my ($p, $tag, $attr) = @_;
+ my $start = sub {
+ my ($p, $tag, $attr) = @_;
- if ($File->{Root}) {
- return unless $tag eq $File->{Root};
- } else {
- $File->{Root} = $tag;
- }
- if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}};
- if ($attr->{version}) {$File->{'Root Version'} = $attr->{version}};
- if ($attr->{baseProfile}) {$File->{'Root BaseProfile'} = $attr->{baseProfile}};
+ if ($File->{Root}) {
+ return unless $tag eq $File->{Root};
+ }
+ else {
+ $File->{Root} = $tag;
+ }
+ if ($attr->{xmlns}) { $File->{Namespace} = $attr->{xmlns} }
+ if ($attr->{version}) { $File->{'Root Version'} = $attr->{version} }
+ if ($attr->{baseProfile}) {
+ $File->{'Root BaseProfile'} = $attr->{baseProfile};
+ }
+
+ # We're done parsing.
+ $p->eof();
+ };
+
+ # we use HTML::Parser as pre-parser. May use html5lib or other in the future
+ my $p = HTML::Parser->new(api_version => 3);
+
+ # if content-type has shown we should pre-parse with XML mode, use that
+ # otherwise (mostly text/html cases) use default mode
+ $p->xml_mode(&is_xml($File));
+ $p->handler(declaration => $dtd, 'text');
+ $p->handler(start => $start, 'self,tag,attr');
- # We're done parsing.
+ my $line = 0;
+ my $max = scalar(@{$File->{Content}});
+ $p->parse(
+ sub {
+ return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef;
+ }
+ );
$p->eof();
- };
-
- # we use HTML::Parser as pre-parser. May use html5lib or other in the future
- my $p = HTML::Parser->new(api_version => 3);
-
- # if content-type has shown we should pre-parse with XML mode, use that
- # otherwise (mostly text/html cases) use default mode
- $p->xml_mode(&is_xml($File));
- $p->handler(declaration => $dtd, 'text');
- $p->handler(start => $start, 'self,tag,attr');
-
- my $line = 0;
- my $max = scalar(@{$File->{Content}});
- $p->parse(sub {
- return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef;
- });
- $p->eof();
-
- # TODO: These \s here are probably wrong now that the strings are utf8_on
- $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
- $File->{DOCTYPE} =~ s(^\s+){ }g;
- $File->{DOCTYPE} =~ s(\s+$){ }g;
- $File->{DOCTYPE} =~ s(\s+) { }g;
-
- # Some document types actually need no doctype to be identified,
- # root element and some version attribute is enough
- # TODO applicable doctypes should be migrated to a config file?
-
- # if (($File->{DOCTYPE} eq '') and ($File->{Root} eq "svg") ) {
- # if (($File->{'Root Version'}) or ($File->{'Root BaseProfile'}))
- # {
- # if (! $File->{'Root Version'}) { $File->{'Root Version'} = "0"; }
- # if (! $File->{'Root BaseProfile'}) { $File->{'Root BaseProfile'} = "0"; }
- # if ($File->{'Root Version'} eq "1.0"){
- # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.0//EN";
- # $File->{"DOCTYPEless OK"} = TRUE;
- # $File->{Opt}->{DOCTYPE} = "SVG 1.0";
- # }
- # if ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "tiny")) {
- # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1 Tiny//EN";
- # $File->{"DOCTYPEless OK"} = TRUE;
- # $File->{Opt}->{DOCTYPE} = "SVG 1.1 Tiny";
- # }
- # elsif ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "basic")) {
- # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1 Basic//EN";
- # $File->{Opt}->{DOCTYPE} = "SVG 1.1 Basic";
- # $File->{"DOCTYPEless OK"} = TRUE;
- # }
- # elsif (($File->{'Root Version'} eq "1.1") and (!$File->{'Root BaseProfile'})) {
- # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN";
- # $File->{Opt}->{DOCTYPE} = "SVG 1.1";
- # $File->{"DOCTYPEless OK"} = TRUE;
- # }
- # if ($File->{'Root Version'} eq "0") { $File->{'Root Version'} = undef; }
- # if ($File->{'Root BaseProfile'} eq "0") { $File->{'Root BaseProfile'} = undef; }
- # }
- # else {
- # # by default for an svg root elt, we use SVG 1.1
- # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN";
- # $File->{Opt}->{DOCTYPE} = "SVG 1.1";
- # $File->{"DOCTYPEless OK"} = TRUE;
- # }
- # }
- if (($File->{"DOCTYPEless OK"}) and ($File->{Opt}->{DOCTYPE})) {
- # doctypeless document type found, we fake the override
- # so that the parser will have something to validate against
- $File = &override_doctype($File);
- }
- return $File;
-}
+ # TODO: These \s here are probably wrong now that the strings are utf8_on
+ $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
+ $File->{DOCTYPE} =~ s(^\s+){ }g;
+ $File->{DOCTYPE} =~ s(\s+$){ }g;
+ $File->{DOCTYPE} =~ s(\s+) { }g;
+
+ # Some document types actually need no doctype to be identified,
+ # root element and some version attribute is enough
+ # TODO applicable doctypes should be migrated to a config file?
+
+ # if (($File->{DOCTYPE} eq '') and ($File->{Root} eq "svg") ) {
+ # if (($File->{'Root Version'}) or ($File->{'Root BaseProfile'}))
+ # {
+ # if (! $File->{'Root Version'}) { $File->{'Root Version'} = "0"; }
+ # if (! $File->{'Root BaseProfile'}) { $File->{'Root BaseProfile'} = "0"; }
+ # if ($File->{'Root Version'} eq "1.0"){
+ # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.0//EN";
+ # $File->{"DOCTYPEless OK"} = TRUE;
+ # $File->{Opt}->{DOCTYPE} = "SVG 1.0";
+ # }
+ # if ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "tiny")) {
+ # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1 Tiny//EN";
+ # $File->{"DOCTYPEless OK"} = TRUE;
+ # $File->{Opt}->{DOCTYPE} = "SVG 1.1 Tiny";
+ # }
+ # elsif ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "basic")) {
+ # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1 Basic//EN";
+ # $File->{Opt}->{DOCTYPE} = "SVG 1.1 Basic";
+ # $File->{"DOCTYPEless OK"} = TRUE;
+ # }
+ # elsif (($File->{'Root Version'} eq "1.1") and (!$File->{'Root BaseProfile'})) {
+ # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN";
+ # $File->{Opt}->{DOCTYPE} = "SVG 1.1";
+ # $File->{"DOCTYPEless OK"} = TRUE;
+ # }
+ # if ($File->{'Root Version'} eq "0") { $File->{'Root Version'} = undef; }
+ # if ($File->{'Root BaseProfile'} eq "0") { $File->{'Root BaseProfile'} = undef; }
+ # }
+ # else {
+ # # by default for an svg root elt, we use SVG 1.1
+ # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN";
+ # $File->{Opt}->{DOCTYPE} = "SVG 1.1";
+ # $File->{"DOCTYPEless OK"} = TRUE;
+ # }
+ # }
+ if (($File->{"DOCTYPEless OK"}) and ($File->{Opt}->{DOCTYPE})) {
+
+ # doctypeless document type found, we fake the override
+ # so that the parser will have something to validate against
+ $File = &override_doctype($File);
+ }
+ return $File;
+}
#
# Preprocess CGI parameters.
-sub prepCGI {
- my $File = shift;
- my $q = shift;
-
- my $path_info;
- {
- # @@@HACK:
- # CGI.pm's _name_and_path_from_env has query string related issues;
- # just trump the query string for the duration of calling methods we
- # know we don't need it for and which have been affected in the past.
- # More info: http://www.w3.org/Bugs/Public/show_bug.cgi?id=4365
- # ->url() reportedly fixed in CGI.pm 3.34, ->path_info() in 3.40.
-
- local $ENV{REQUEST_URI} = URI->new($ENV{REQUEST_URI})->path()
- if $ENV{REQUEST_URI};
-
- # The URL to this CGI script.
- $File->{Env}->{'Self URI'} = $q->url();
-
- $path_info = $q->path_info();
- }
-
- # Avoid CGI.pm's "exists but undef" behaviour.
- if (scalar $q->param) {
- foreach my $param ($q->param) {
- next if $param eq 'uploaded_file'; # 'uploaded_file' contains data.
- next if $param eq 'fragment'; # Ditto 'fragment'.
- next if $param eq 'accept'; # Original checking had a specific Accept: header sent.
- next if $param eq 'accept-language'; # Ditto Accept-Language:.
- next if $param eq 'accept-charset'; # Ditto Accept-Charset:.
- next if $q->param($param) eq '0'; # Keep false-but-set params.
-
- #
- # Parameters that are given to us without specifying a value get
- # set to "1" (the "TRUE" constant). This is so we can test for the
- # boolean value of a parameter instead of first checking whether
- # the param was given and then testing its value. Needed because
- # CGI.pm sets ";param" and ";param=" to a boolean false value
- # (undef() or a null string, respectively).
- $q->param($param, TRUE) unless $q->param($param);
- }
- }
-
- # IIS reportedly does not provide the $path_info we expect - hack around it.
- $path_info =~ s|(.*)/check\.pl(.*)$|$2|
- if ($ENV{SERVER_SOFTWARE} and $ENV{SERVER_SOFTWARE} =~ /Microsoft-IIS/);
-
- # apparently, with mod_perl2, $path_info is empty even if it should be filled
- # working around that
- if (!$path_info && $File->{Env}->{'Self URI'} =~ /check\/referr?er$/) {
- $path_info = '/referer';
- $File->{Env}->{'Self URI'} =~ s/\/referr?er$//;
- }
-
- # Futz the URL so "/referer" works.
- if ($path_info) {
- if ($path_info =~ m|^/referr?er$|) {
- if ($q->referer) {
- $q->param('uri', $q->referer);
- print redirect &self_url_q($q, $File);
- } else {
- print redirect $File->{Env}->{'Self URI'} . '?uri=referer';
- }
- } else {
- print redirect &self_url_q($q, $File);
- }
- exit;
- }
-
- # Use "url" unless a "uri" was also given.
- if ($q->param('url') and not $q->param('uri')) {
- $q->param('uri', $q->param('url'));
- }
-
- # Munge the URL to include commonly omitted prefix.
- my $u = $q->param('uri');
- $q->param('uri', "http://$u") if $u && $u =~ m(^www)i;
-
- # Issue a redirect for uri=referer.
- if ($q->param('uri') and $q->param('uri') eq 'referer') {
- if ($q->referer) {
- $q->param('uri', $q->referer);
- $q->param('accept',$q->http('Accept')) if ($q->http('Accept'));
- $q->param('accept-language',$q->http('Accept-Language')) if ($q->http('Accept-Language'));
- $q->param('accept-charset',$q->http('Accept-Charset')) if ($q->http('Accept-Charset'));
- print redirect &self_url_q($q, $File);
- exit;
- } else {
-
- # Redirected from /check/referer to /check?uri=referer because
- # the browser didn't send a Referer header, or the request was
- # for /check?uri=referer but no Referer header was found.
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_referer_error => TRUE,
- );
- }
- }
-
- # Supersede URL with an uploaded file.
- if ($q->param('uploaded_file')) {
- $q->param('uri', 'upload://' . $q->param('uploaded_file'));
- $File->{'Is Upload'} = TRUE; # Tag it for later use.
- }
-
- # Supersede URL with an uploaded fragment.
- if ($q->param('fragment')) {
- $q->param('uri', 'upload://Form Submission');
- $File->{'Direct Input'} = TRUE; # Tag it for later use.
- }
-
- # Redirect to a GETable URL if method is POST without a file upload.
- if (defined $q->request_method and $q->request_method eq 'POST'
- and not ($File->{'Is Upload'} or $File->{'Direct Input'})) {
- my $thispage = &self_url_q($q, $File);
- print redirect $thispage;
- exit;
- }
-
- #
- # Flag an error if we didn't get a file to validate.
- unless ($q->param('uri')) {
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_uri_error => TRUE,
- fatal_uri_scheme => 'undefined',
- );
- }
+sub prepCGI
+{
+ my $File = shift;
+ my $q = shift;
- return $q;
+ my $path_info;
+ {
+
+ # @@@HACK:
+ # CGI.pm's _name_and_path_from_env has query string related issues;
+ # just trump the query string for the duration of calling methods we
+ # know we don't need it for and which have been affected in the past.
+ # More info: http://www.w3.org/Bugs/Public/show_bug.cgi?id=4365
+ # ->url() reportedly fixed in CGI.pm 3.34, ->path_info() in 3.40.
+
+ local $ENV{REQUEST_URI} = URI->new($ENV{REQUEST_URI})->path()
+ if $ENV{REQUEST_URI};
+
+ # The URL to this CGI script.
+ $File->{Env}->{'Self URI'} = $q->url();
+
+ $path_info = $q->path_info();
+ }
+
+ # Avoid CGI.pm's "exists but undef" behaviour.
+ if (scalar $q->param) {
+ foreach my $param ($q->param) {
+ next if $param eq 'uploaded_file'; # 'uploaded_file' contains data.
+ next if $param eq 'fragment'; # Ditto 'fragment'.
+ next
+ if $param eq 'accept'
+ ; # Original checking had a specific Accept: header sent.
+ next if $param eq 'accept-language'; # Ditto Accept-Language:.
+ next if $param eq 'accept-charset'; # Ditto Accept-Charset:.
+ next if $q->param($param) eq '0'; # Keep false-but-set params.
+
+ #
+ # Parameters that are given to us without specifying a value get
+ # set to "1" (the "TRUE" constant). This is so we can test for the
+ # boolean value of a parameter instead of first checking whether
+ # the param was given and then testing its value. Needed because
+ # CGI.pm sets ";param" and ";param=" to a boolean false value
+ # (undef() or a null string, respectively).
+ $q->param($param, TRUE) unless $q->param($param);
+ }
+ }
+
+ # IIS reportedly does not provide the $path_info we expect - hack around it.
+ $path_info =~ s|(.*)/check\.pl(.*)$|$2|
+ if ($ENV{SERVER_SOFTWARE} and
+ $ENV{SERVER_SOFTWARE} =~ /Microsoft-IIS/);
+
+ # apparently, with mod_perl2, $path_info is empty even if it should be filled
+ # working around that
+ if (!$path_info && $File->{Env}->{'Self URI'} =~ /check\/referr?er$/) {
+ $path_info = '/referer';
+ $File->{Env}->{'Self URI'} =~ s/\/referr?er$//;
+ }
+
+ # Futz the URL so "/referer" works.
+ if ($path_info) {
+ if ($path_info =~ m|^/referr?er$|) {
+ if ($q->referer) {
+ $q->param('uri', $q->referer);
+ print redirect &self_url_q($q, $File);
+ }
+ else {
+ print redirect $File->{Env}->{'Self URI'} . '?uri=referer';
+ }
+ }
+ else {
+ print redirect &self_url_q($q, $File);
+ }
+ exit;
+ }
+
+ # Use "url" unless a "uri" was also given.
+ if ($q->param('url') and not $q->param('uri')) {
+ $q->param('uri', $q->param('url'));
+ }
+
+ # Munge the URL to include commonly omitted prefix.
+ my $u = $q->param('uri');
+ $q->param('uri', "http://$u") if $u && $u =~ m(^www)i;
+
+ # Issue a redirect for uri=referer.
+ if ($q->param('uri') and $q->param('uri') eq 'referer') {
+ if ($q->referer) {
+ $q->param('uri', $q->referer);
+ $q->param('accept', $q->http('Accept')) if ($q->http('Accept'));
+ $q->param('accept-language', $q->http('Accept-Language'))
+ if ($q->http('Accept-Language'));
+ $q->param('accept-charset', $q->http('Accept-Charset'))
+ if ($q->http('Accept-Charset'));
+ print redirect &self_url_q($q, $File);
+ exit;
+ }
+ else {
+
+ # Redirected from /check/referer to /check?uri=referer because
+ # the browser didn't send a Referer header, or the request was
+ # for /check?uri=referer but no Referer header was found.
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')
+ ->param(fatal_referer_error => TRUE,);
+ }
+ }
+
+ # Supersede URL with an uploaded file.
+ if ($q->param('uploaded_file')) {
+ $q->param('uri', 'upload://' . $q->param('uploaded_file'));
+ $File->{'Is Upload'} = TRUE; # Tag it for later use.
+ }
+
+ # Supersede URL with an uploaded fragment.
+ if ($q->param('fragment')) {
+ $q->param('uri', 'upload://Form Submission');
+ $File->{'Direct Input'} = TRUE; # Tag it for later use.
+ }
+
+ # Redirect to a GETable URL if method is POST without a file upload.
+ if (defined $q->request_method and
+ $q->request_method eq 'POST' and
+ not($File->{'Is Upload'} or $File->{'Direct Input'}))
+ {
+ my $thispage = &self_url_q($q, $File);
+ print redirect $thispage;
+ exit;
+ }
+
+ #
+ # Flag an error if we didn't get a file to validate.
+ unless ($q->param('uri')) {
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_uri_error => TRUE,
+ fatal_uri_scheme => 'undefined',
+ );
+ }
+
+ return $q;
}
#
@@ -2451,21 +2698,23 @@ sub prepCGI {
# * Doctype Declaration
# * XML Declaration
# * XML namespaces
-sub set_parse_mode {
- my $File = shift;
- my $CFG = shift;
- my $fpi = $File->{DOCTYPE};
- $File->{ModeChoice} = '';
- my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'} || 'TBD';
-
- # $File->{Mode} may have been set in parse_content_type
- # and it would come from the Media Type
- my $parseModeFromMimeType = $File->{Mode};
- my $begincontent = join "\x20",@{$File->{Content}}; # for the sake of xml decl detection,
- # the 10 first lines should be safe
- my $parseModeFromXMLDecl = (
- $begincontent
- =~ /^ [\x20|\x09|\x0D|\x0A]* # whitespace before the decl should not be happening
+sub set_parse_mode
+{
+ my $File = shift;
+ my $CFG = shift;
+ my $fpi = $File->{DOCTYPE};
+ $File->{ModeChoice} = '';
+ my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'} || 'TBD';
+
+ # $File->{Mode} may have been set in parse_content_type
+ # and it would come from the Media Type
+ my $parseModeFromMimeType = $File->{Mode};
+ my $begincontent = join "\x20",
+ @{$File->{Content}}; # for the sake of xml decl detection,
+ # the 10 first lines should be safe
+ my $parseModeFromXMLDecl = (
+ $begincontent =~
+ /^ [\x20|\x09|\x0D|\x0A]* # whitespace before the decl should not be happening
# but we are greedy for the sake of detection, not validation
<\?xml # start matching an XML Declaration
[\x20|\x09|\x0D|\x0A]+ # x20, x09, xD and xA are the allowed "xml white space"
@@ -2482,402 +2731,480 @@ sub set_parse_mode {
)? # ditto standalone info, optional
[\x20|\x09|\x0D|\x0A]* \?> # end of XML Declaration
/x
- ? 'XML' : 'TBD' );
-
- my $parseModeFromNamespace = 'TBD';
- if ($File->{Namespace}) { $parseModeFromNamespace = 'XML'}
-
- if (($parseModeFromMimeType eq 'TBD') and ($parseModeFromXMLDecl eq 'TBD') and ($parseModeFromNamespace eq 'TBD') and (!exists $CFG->{Types}->{$fpi})) {
- # if the mime type is text/html (ambiguous, hence TBD mode)
- # and the doctype isn't in the catalogue
- # and XML prolog detection was unsuccessful
- # and we found no namespace at the root
- # ... throw in a warning
- &add_warning('W06', {
- W06_mime => $File->{ContentType},
- w06_doctype => $File->{DOCTYPE}
- });
- return;
- }
-
- $parseModeFromDoctype = 'TBD' unless $parseModeFromDoctype eq 'SGML' or $parseModeFromDoctype eq 'HTML5' or $parseModeFromDoctype eq 'XML' or $parseModeFromNamespace eq 'XML';
-
- if (($parseModeFromDoctype eq 'TBD') and ($parseModeFromXMLDecl eq 'TBD') and ($parseModeFromMimeType eq 'TBD') and ($parseModeFromNamespace eq 'TBD')) {
- # if all factors are useless to give us a parse mode
- # => we use SGML-based DTD validation as a default
- $File->{Mode} = 'DTD+SGML';
- $File->{ModeChoice} = 'Fallback';
- # and send warning about the fallback
- &add_warning('W06', {
- W06_mime => $File->{ContentType},
- w06_doctype => $File->{DOCTYPE}
- });
- return;
- }
- elsif ($parseModeFromMimeType ne 'TBD') {
- # if The mime type gives clear indication of whether the document is XML or not
- if (($parseModeFromDoctype ne 'TBD') and ($parseModeFromDoctype ne 'HTML5') and ($parseModeFromMimeType ne $parseModeFromDoctype)) {
- # if document-type recommended mode and content-type recommended mode clash, shoot a warning
- # unknown doctypes will not trigger this
- # neither will html5 documents, which can be XML or not
- &add_warning('W07', {
- W07_mime => $File->{ContentType},
- W07_ct => $parseModeFromMimeType,
- W07_dtd => $parseModeFromDoctype,
- });
- }
- # mime type has precedence, we stick to it
- $File->{ModeChoice} = 'Mime';
- if ($parseModeFromDoctype eq "HTML5") {
- $File->{Mode} = 'HTML5+'.$File->{Mode};
- } else {
- $File->{Mode} = 'DTD+'.$File->{Mode};
- }
- return;
- }
- elsif ($parseModeFromDoctype ne 'TBD') {
- # the mime type is ambiguous (hence we didn't stop at the previous test)
- # but by now we're sure that the document type is a good indication
- # so we use that.
- if ($parseModeFromDoctype eq "HTML5") {
- if ($parseModeFromXMLDecl eq "XML" or $parseModeFromNamespace eq "XML") {
- $File->{Mode} = "HTML5+XML";
- }
- else {
- $File->{Mode} = "HTML5";
- }
- }
- else { # not HTML5
- $File->{Mode} = "DTD+".$parseModeFromDoctype;
- }
- $File->{ModeChoice} = 'Doctype';
- return;
- }
- elsif ($parseModeFromXMLDecl ne 'TBD') {
- # the mime type is ambiguous (hence we didn't stop at the previous test)
- # and so was the doctype
- # but we found an XML declaration
- # so we use that.
- if ($File->{Mode} eq "") {
- $File->{Mode} = "DTD+".$parseModeFromXMLDecl;
- }
- elsif ($File->{Mode} =~ /\+/ ) {
- $File->{Mode} =~ s/\+.*/\+$parseModeFromXMLDecl/;
- }
- else {
- $File->{Mode} = $File->{Mode}."+".$parseModeFromXMLDecl;
+ ?
+ 'XML' :
+ 'TBD'
+ );
+
+ my $parseModeFromNamespace = 'TBD';
+ if ($File->{Namespace}) { $parseModeFromNamespace = 'XML' }
+
+ if (($parseModeFromMimeType eq 'TBD') and
+ ($parseModeFromXMLDecl eq 'TBD') and
+ ($parseModeFromNamespace eq 'TBD') and
+ (!exists $CFG->{Types}->{$fpi}))
+ {
+
+ # if the mime type is text/html (ambiguous, hence TBD mode)
+ # and the doctype isn't in the catalogue
+ # and XML prolog detection was unsuccessful
+ # and we found no namespace at the root
+ # ... throw in a warning
+ &add_warning(
+ 'W06',
+ { W06_mime => $File->{ContentType},
+ w06_doctype => $File->{DOCTYPE}
+ }
+ );
+ return;
}
- $File->{ModeChoice} = 'XMLDecl';
- return;
- }
- else {
- # this is the last case. We know that all modes are not TBD,
- # yet mime type, doctype AND XML DECL tests have failed => we are saved by the presence of namespaces
- if ($File->{Mode} eq "") {
- $File->{Mode} = "DTD+".$parseModeFromNamespace;
+
+ $parseModeFromDoctype = 'TBD'
+ unless $parseModeFromDoctype eq 'SGML' or
+ $parseModeFromDoctype eq 'HTML5' or
+ $parseModeFromDoctype eq 'XML' or
+ $parseModeFromNamespace eq 'XML';
+
+ if (($parseModeFromDoctype eq 'TBD') and
+ ($parseModeFromXMLDecl eq 'TBD') and
+ ($parseModeFromMimeType eq 'TBD') and
+ ($parseModeFromNamespace eq 'TBD'))
+ {
+
+ # if all factors are useless to give us a parse mode
+ # => we use SGML-based DTD validation as a default
+ $File->{Mode} = 'DTD+SGML';
+ $File->{ModeChoice} = 'Fallback';
+
+ # and send warning about the fallback
+ &add_warning(
+ 'W06',
+ { W06_mime => $File->{ContentType},
+ w06_doctype => $File->{DOCTYPE}
+ }
+ );
+ return;
+ }
+ elsif ($parseModeFromMimeType ne 'TBD') {
+
+ # if The mime type gives clear indication of whether the document is XML or not
+ if (($parseModeFromDoctype ne 'TBD') and
+ ($parseModeFromDoctype ne 'HTML5') and
+ ($parseModeFromMimeType ne $parseModeFromDoctype))
+ {
+
+ # if document-type recommended mode and content-type recommended mode clash, shoot a warning
+ # unknown doctypes will not trigger this
+ # neither will html5 documents, which can be XML or not
+ &add_warning(
+ 'W07',
+ { W07_mime => $File->{ContentType},
+ W07_ct => $parseModeFromMimeType,
+ W07_dtd => $parseModeFromDoctype,
+ }
+ );
+ }
+
+ # mime type has precedence, we stick to it
+ $File->{ModeChoice} = 'Mime';
+ if ($parseModeFromDoctype eq "HTML5") {
+ $File->{Mode} = 'HTML5+' . $File->{Mode};
+ }
+ else {
+ $File->{Mode} = 'DTD+' . $File->{Mode};
+ }
+ return;
+ }
+ elsif ($parseModeFromDoctype ne 'TBD') {
+
+ # the mime type is ambiguous (hence we didn't stop at the previous test)
+ # but by now we're sure that the document type is a good indication
+ # so we use that.
+ if ($parseModeFromDoctype eq "HTML5") {
+ if ($parseModeFromXMLDecl eq "XML" or
+ $parseModeFromNamespace eq "XML")
+ {
+ $File->{Mode} = "HTML5+XML";
+ }
+ else {
+ $File->{Mode} = "HTML5";
+ }
+ }
+ else { # not HTML5
+ $File->{Mode} = "DTD+" . $parseModeFromDoctype;
+ }
+ $File->{ModeChoice} = 'Doctype';
+ return;
}
- elsif ($File->{Mode} =~ /\+/ ) {
- $File->{Mode} =~ s/\+.*/\+$parseModeFromNamespace/;
+ elsif ($parseModeFromXMLDecl ne 'TBD') {
+
+ # the mime type is ambiguous (hence we didn't stop at the previous test)
+ # and so was the doctype
+ # but we found an XML declaration
+ # so we use that.
+ if ($File->{Mode} eq "") {
+ $File->{Mode} = "DTD+" . $parseModeFromXMLDecl;
+ }
+ elsif ($File->{Mode} =~ /\+/) {
+ $File->{Mode} =~ s/\+.*/\+$parseModeFromXMLDecl/;
+ }
+ else {
+ $File->{Mode} = $File->{Mode} . "+" . $parseModeFromXMLDecl;
+ }
+ $File->{ModeChoice} = 'XMLDecl';
+ return;
}
else {
- $File->{Mode} = $File->{Mode}."+".$parseModeFromNamespace;
+
+ # this is the last case. We know that all modes are not TBD,
+ # yet mime type, doctype AND XML DECL tests have failed => we are saved by the presence of namespaces
+ if ($File->{Mode} eq "") {
+ $File->{Mode} = "DTD+" . $parseModeFromNamespace;
+ }
+ elsif ($File->{Mode} =~ /\+/) {
+ $File->{Mode} =~ s/\+.*/\+$parseModeFromNamespace/;
+ }
+ else {
+ $File->{Mode} = $File->{Mode} . "+" . $parseModeFromNamespace;
+ }
+ $File->{ModeChoice} = 'Namespace';
}
- $File->{ModeChoice} = 'Namespace';
- }
}
-
#
# Utility sub to tell if mode "is" XML.
-sub is_xml {shift->{Mode} =~ /XML/};
+sub is_xml { shift->{Mode} =~ /XML/ }
#
# Check charset conflicts and add any warnings necessary.
-sub charset_conflicts {
- my $File = shift;
-
- #
- # Handle the case where there was no charset to be found.
- unless ($File->{Charset}->{Use}) {
- &add_warning('W17', {});
- $File->{Tentative} |= T_WARN;
- }
-
- #
- # Add a warning if there was charset info conflict (HTTP header,
- # XML declaration, or <meta> element).
- # filtering out some of the warnings in direct input mode where HTTP encoding is a "fake"
- if ((charset_not_equal($File->{Charset}->{HTTP}, $File->{Charset}->{XML})) and not ($File->{'Direct Input'})) {
- &add_warning('W18', {
- W18_http => $File->{Charset}->{HTTP},
- W18_xml => $File->{Charset}->{XML},
- W18_use => $File->{Charset}->{Use},
- });
- } elsif (charset_not_equal($File->{Charset}->{HTTP}, $File->{Charset}->{META}) and not ($File->{'Direct Input'})) {
- &add_warning('W19', {
- W19_http => $File->{Charset}->{HTTP},
- W19_meta => $File->{Charset}->{META},
- W19_use => $File->{Charset}->{Use},
- });
- } elsif (charset_not_equal($File->{Charset}->{XML}, $File->{Charset}->{META})) {
- &add_warning('W20', {
- W20_http => $File->{Charset}->{XML},
- W20_xml => $File->{Charset}->{META},
- });
- $File->{Tentative} |= T_WARN;
- }
-
- return $File;
+sub charset_conflicts
+{
+ my $File = shift;
+
+ #
+ # Handle the case where there was no charset to be found.
+ unless ($File->{Charset}->{Use}) {
+ &add_warning('W17', {});
+ $File->{Tentative} |= T_WARN;
+ }
+
+ #
+ # Add a warning if there was charset info conflict (HTTP header,
+ # XML declaration, or <meta> element).
+ # filtering out some of the warnings in direct input mode where HTTP encoding is a "fake"
+ if (( charset_not_equal(
+ $File->{Charset}->{HTTP},
+ $File->{Charset}->{XML}
+ )
+ ) and
+ not($File->{'Direct Input'})
+ )
+ {
+ &add_warning(
+ 'W18',
+ { W18_http => $File->{Charset}->{HTTP},
+ W18_xml => $File->{Charset}->{XML},
+ W18_use => $File->{Charset}->{Use},
+ }
+ );
+ }
+ elsif (
+ charset_not_equal($File->{Charset}->{HTTP}, $File->{Charset}->{META})
+ and not($File->{'Direct Input'}))
+ {
+ &add_warning(
+ 'W19',
+ { W19_http => $File->{Charset}->{HTTP},
+ W19_meta => $File->{Charset}->{META},
+ W19_use => $File->{Charset}->{Use},
+ }
+ );
+ }
+ elsif (
+ charset_not_equal($File->{Charset}->{XML}, $File->{Charset}->{META}))
+ {
+ &add_warning(
+ 'W20',
+ { W20_http => $File->{Charset}->{XML},
+ W20_xml => $File->{Charset}->{META},
+ }
+ );
+ $File->{Tentative} |= T_WARN;
+ }
+
+ return $File;
}
#
# Transcode to UTF-8
-sub transcode {
- my $File = shift;
-
- my $general_charset = $File->{Charset}->{Use};
- my $exact_charset = $general_charset;
-
- # TODO: This should be done before transcode()
- if ($general_charset eq 'utf-16') {
- if ($File->{Charset}->{Auto} =~ m/^utf-16[bl]e$/) {
- $exact_charset = $File->{Charset}->{Auto};
- } else { $exact_charset = 'utf-16be'; }
- }
-
- my $cs = $exact_charset;
-
- if ($CFG->{Charsets}->{$cs}) {
- if ($CFG->{Charsets}->{$cs} =~ /ERR /) {
- # The encoding is not supported due to policy
-
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_transcode_error => TRUE,
- fatal_transcode_charset => $cs,
- # @@FIXME might need better text
- fatal_transcode_errmsg => 'This encoding is not supported by the validator.',
- );
- return $File;
- }
- elsif ($CFG->{Charsets}->{$cs} =~ /X /) {
- # possibly problematic, we recommend another alias
- my $recommended_charset = $CFG->{Charsets}->{$cs};
- $recommended_charset =~ s/X //;
- &add_warning('W22', {
- W22_declared => $cs,
- W22_suggested => $recommended_charset,
- });
- }
- }
-
- # Does the system support decoding this encoding?
- my $enc = Encode::find_encoding($cs);
-
- if (!$enc) {
- # This system's Encode installation does not support
- # the character encoding; might need additional modules
-
- $File->{'Error Flagged'} = TRUE;
- &get_template($File, 'fatal-error.tmpl')->param(
- fatal_transcode_error => TRUE,
- fatal_transcode_charset => $cs,
- # @@FIXME might need better text
- fatal_transcode_errmsg => 'Encoding not supported.',
- );
- return $File;
- }
- elsif (!$CFG->{Charsets}->{$cs}) {
- # not in the list, but technically OK -> we warn
- &add_warning('W24', {
- W24_declared => $cs,
- });
-
- }
-
- my $output;
- my $input = $File->{Bytes};
-
- # Try to transcode
- eval {
- $output = $enc->decode($input, Encode::FB_CROAK);
- };
-
- if ($@) {
- # Transcoding failed - do it again line by line to find out exactly where
- my $line_num = 0;
- foreach my $input_line (split /\r\n|\n|\r/, $input) {
- $line_num++;
- eval {
- $enc->decode($input_line, Encode::FB_CROAK);
- };
- if ($@) {
- my $croak_message = $@;
- $croak_message =~ s/ at .*//;
+sub transcode
+{
+ my $File = shift;
+
+ my $general_charset = $File->{Charset}->{Use};
+ my $exact_charset = $general_charset;
+
+ # TODO: This should be done before transcode()
+ if ($general_charset eq 'utf-16') {
+ if ($File->{Charset}->{Auto} =~ m/^utf-16[bl]e$/) {
+ $exact_charset = $File->{Charset}->{Auto};
+ }
+ else { $exact_charset = 'utf-16be'; }
+ }
+
+ my $cs = $exact_charset;
+
+ if ($CFG->{Charsets}->{$cs}) {
+ if ($CFG->{Charsets}->{$cs} =~ /ERR /) {
+
+ # The encoding is not supported due to policy
+
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_transcode_error => TRUE,
+ fatal_transcode_charset => $cs,
+
+ # @@FIXME might need better text
+ fatal_transcode_errmsg =>
+ 'This encoding is not supported by the validator.',
+ );
+ return $File;
+ }
+ elsif ($CFG->{Charsets}->{$cs} =~ /X /) {
+
+ # possibly problematic, we recommend another alias
+ my $recommended_charset = $CFG->{Charsets}->{$cs};
+ $recommended_charset =~ s/X //;
+ &add_warning(
+ 'W22',
+ { W22_declared => $cs,
+ W22_suggested => $recommended_charset,
+ }
+ );
+ }
+ }
+
+ # Does the system support decoding this encoding?
+ my $enc = Encode::find_encoding($cs);
+
+ if (!$enc) {
+
+ # This system's Encode installation does not support
+ # the character encoding; might need additional modules
+
$File->{'Error Flagged'} = TRUE;
&get_template($File, 'fatal-error.tmpl')->param(
- fatal_byte_error => TRUE,
- fatal_byte_lines => $line_num,
- fatal_byte_charset => $cs,
- fatal_byte_error_msg => $croak_message,
+ fatal_transcode_error => TRUE,
+ fatal_transcode_charset => $cs,
+
+ # @@FIXME might need better text
+ fatal_transcode_errmsg => 'Encoding not supported.',
);
- }
+ return $File;
}
- return $File;
- }
+ elsif (!$CFG->{Charsets}->{$cs}) {
+
+ # not in the list, but technically OK -> we warn
+ &add_warning('W24', {W24_declared => $cs,});
+
+ }
+
+ my $output;
+ my $input = $File->{Bytes};
- # @@FIXME is this what we want?
- $output =~ s/\015?\012/\n/g;
+ # Try to transcode
+ eval { $output = $enc->decode($input, Encode::FB_CROAK); };
- # make sure we deal only with unix newlines
- # tentative fix for http://www.w3.org/Bugs/Public/show_bug.cgi?id=3992
- $output =~ s/(\r\n|\n|\r)/\n/g;
+ if ($@) {
+
+ # Transcoding failed - do it again line by line to find out exactly where
+ my $line_num = 0;
+ foreach my $input_line (split /\r\n|\n|\r/, $input) {
+ $line_num++;
+ eval { $enc->decode($input_line, Encode::FB_CROAK); };
+ if ($@) {
+ my $croak_message = $@;
+ $croak_message =~ s/ at .*//;
+ $File->{'Error Flagged'} = TRUE;
+ &get_template($File, 'fatal-error.tmpl')->param(
+ fatal_byte_error => TRUE,
+ fatal_byte_lines => $line_num,
+ fatal_byte_charset => $cs,
+ fatal_byte_error_msg => $croak_message,
+ );
+ }
+ }
+ return $File;
+ }
+
+ # @@FIXME is this what we want?
+ $output =~ s/\015?\012/\n/g;
- #debug: we could check if the content has utf8 bit on with
- #$output= utf8::is_utf8($output) ? 1 : 0;
- $File->{Content} = [split/\n/, $output];
+ # make sure we deal only with unix newlines
+ # tentative fix for http://www.w3.org/Bugs/Public/show_bug.cgi?id=3992
+ $output =~ s/(\r\n|\n|\r)/\n/g;
- return $File;
+ #debug: we could check if the content has utf8 bit on with
+ #$output= utf8::is_utf8($output) ? 1 : 0;
+ $File->{Content} = [split /\n/, $output];
+
+ return $File;
}
sub find_encodings
{
- my $File = shift;
- my $bom = HTML::Encoding::encoding_from_byte_order_mark($File->{Bytes});
- my @first = HTML::Encoding::encoding_from_first_chars($File->{Bytes});
+ my $File = shift;
+ my $bom = HTML::Encoding::encoding_from_byte_order_mark($File->{Bytes});
+ my @first = HTML::Encoding::encoding_from_first_chars($File->{Bytes});
- if (defined $bom)
- {
- # @@FIXME this BOM entry should not be needed at all!
- $File->{BOM} = length(Encode::encode($bom, "\x{FEFF}"));
- $File->{Charset}->{Auto} = lc $bom;
- }
- else
- {
- $File->{Charset}->{Auto} = lc($first[0]) if @first;
- }
+ if (defined $bom) {
- my $xml = HTML::Encoding::encoding_from_xml_document($File->{Bytes});
- $File->{Charset}->{XML} = lc $xml if defined $xml;
+ # @@FIXME this BOM entry should not be needed at all!
+ $File->{BOM} = length(Encode::encode($bom, "\x{FEFF}"));
+ $File->{Charset}->{Auto} = lc $bom;
+ }
+ else {
+ $File->{Charset}->{Auto} = lc($first[0]) if @first;
+ }
- my %metah;
- foreach my $try (@first)
- {
- # @@FIXME I think the old code used HTML::Parser xml mode, check this is ok
- my $meta = HTML::Encoding::encoding_from_meta_element($File->{Bytes}, $try);
- $metah{lc($meta)}++ if defined $meta and length $meta;
- }
+ my $xml = HTML::Encoding::encoding_from_xml_document($File->{Bytes});
+ $File->{Charset}->{XML} = lc $xml if defined $xml;
- my @meta = sort { $metah{$b} <=> $metah{$a} } keys %metah;
- $File->{Charset}->{META} = $meta[0] if @meta;
+ my %metah;
+ foreach my $try (@first) {
- return $File;
+ # @@FIXME I think the old code used HTML::Parser xml mode, check this is ok
+ my $meta =
+ HTML::Encoding::encoding_from_meta_element($File->{Bytes}, $try);
+ $metah{lc($meta)}++ if defined $meta and length $meta;
+ }
+
+ my @meta = sort { $metah{$b} <=> $metah{$a} } keys %metah;
+ $File->{Charset}->{META} = $meta[0] if @meta;
+
+ return $File;
}
#
# Abort with a message if an error was flagged at point.
-sub abort_if_error_flagged {
- my $File = shift;
- my $Flags = shift;
+sub abort_if_error_flagged
+{
+ my $File = shift;
+ my $Flags = shift;
- return unless $File->{'Error Flagged'};
- return if $File->{'Error Handled'}; # Previous error, keep going.
+ return unless $File->{'Error Flagged'};
+ return if $File->{'Error Handled'}; # Previous error, keep going.
- my $tmpl = &get_template($File, 'fatal-error.tmpl');
- $tmpl->param(fatal_error => TRUE);
+ my $tmpl = &get_template($File, 'fatal-error.tmpl');
+ $tmpl->param(fatal_error => TRUE);
- if ($File->{Opt}->{Output} eq 'html') {
- &prep_template($File, $tmpl);
- # transcode output from perl's internal to utf-8 and output
- print Encode::encode('UTF-8', $tmpl->output);
- exit;
- } else {
+ if ($File->{Opt}->{Output} eq 'html') {
+ &prep_template($File, $tmpl);
- #@@FIXME: This is borked after templatification.
- # &add_warning($File, 'fatal', 'Fatal Error', <<".EOF.");
- # A fatal error has occurred while processing the requested document. Processing
- # has continued but any later output will be of dubious quality. Limitations of
- # this output mode prevent the full error message from being returned; please
- # retry this operation in interactive mode using the web interface to see the
- # actual error message.
- # .EOF.
- #@@FIXME;
- $File->{'Error Handled'} = TRUE;
- }
+ # transcode output from perl's internal to utf-8 and output
+ print Encode::encode('UTF-8', $tmpl->output);
+ exit;
+ }
+ else {
+
+ #@@FIXME: This is borked after templatification.
+ # &add_warning($File, 'fatal', 'Fatal Error', <<".EOF.");
+ # A fatal error has occurred while processing the requested document. Processing
+ # has continued but any later output will be of dubious quality. Limitations of
+ # this output mode prevent the full error message from being returned; please
+ # retry this operation in interactive mode using the web interface to see the
+ # actual error message.
+ # .EOF.
+ #@@FIXME;
+ $File->{'Error Handled'} = TRUE;
+ }
}
#
# conflicting encodings
-sub charset_not_equal {
- my $encodingA = shift;
- my $encodingB = shift;
- return $encodingA && $encodingB && ($encodingA ne $encodingB);
+sub charset_not_equal
+{
+ my $encodingA = shift;
+ my $encodingB = shift;
+ return $encodingA && $encodingB && ($encodingA ne $encodingB);
}
#
# Construct a self-referential URL from a CGI.pm $q object.
-sub self_url_q {
- my ($q, $File) = @_;
- my $thispage = $File->{Env}->{'Self URI'} . '?';
+sub self_url_q
+{
+ my ($q, $File) = @_;
+ my $thispage = $File->{Env}->{'Self URI'} . '?';
- # Pass-through parameters
- for my $param (qw(uri accept accept-language accept-charset)) {
- $thispage .= "$param=" . uri_escape($q->param($param)) . ';'
- if $q->param($param);
- }
+ # Pass-through parameters
+ for my $param (qw(uri accept accept-language accept-charset)) {
+ $thispage .= "$param=" . uri_escape($q->param($param)) . ';'
+ if $q->param($param);
+ }
- # Boolean parameters
- for my $param (qw(ss outline No200 verbose group)) {
- $thispage .= "$param=1;" if $q->param($param);
- }
+ # Boolean parameters
+ for my $param (qw(ss outline No200 verbose group)) {
+ $thispage .= "$param=1;" if $q->param($param);
+ }
- # Others
- if ($q->param('doctype')
- and not $q->param('doctype') =~ /(Inline|detect)/i) {
- $thispage .= 'doctype=' . uri_escape($q->param('doctype')) . ';';
- }
- if ($q->param('charset') and not $q->param('charset') =~ /detect/i) {
- $thispage .= 'charset=' . uri_escape($q->param('charset')) . ';';
- }
+ # Others
+ if ($q->param('doctype') and
+ not $q->param('doctype') =~ /(Inline|detect)/i)
+ {
+ $thispage .= 'doctype=' . uri_escape($q->param('doctype')) . ';';
+ }
+ if ($q->param('charset') and not $q->param('charset') =~ /detect/i) {
+ $thispage .= 'charset=' . uri_escape($q->param('charset')) . ';';
+ }
- $thispage =~ s/[\?;]$//;
- return $thispage;
+ $thispage =~ s/[\?;]$//;
+ return $thispage;
}
#
# Return random tip and its URL.
-sub get_tip {
- my @tipAddrs = keys %{$CFG->{Tips}};
- my $tipAddr = $tipAddrs[rand scalar @tipAddrs];
- my $tipSlug = $CFG->{Tips}->{$tipAddr};
+sub get_tip
+{
+ my @tipAddrs = keys %{$CFG->{Tips}};
+ my $tipAddr = $tipAddrs[rand scalar @tipAddrs];
+ my $tipSlug = $CFG->{Tips}->{$tipAddr};
- return [$tipAddr, $tipSlug];
+ return [$tipAddr, $tipSlug];
}
#
# Construct a self-referential URL from a $File object.
-sub self_url_file {
- my $File = shift;
-
- my $thispage = $File->{Env}->{'Self URI'};
- my $escaped_uri = uri_escape($File->{URI});
- $thispage .= qq(?uri=$escaped_uri);
- $thispage .= ';ss=1' if $File->{Opt}->{'Show Source'};
- $thispage .= ';st=1' if $File->{Opt}->{'Show Tidy'};
- $thispage .= ';outline=1' if $File->{Opt}->{Outline};
- $thispage .= ';No200=1' if $File->{Opt}->{No200};
- $thispage .= ';verbose=1' if $File->{Opt}->{Verbose};
- $thispage .= ';group=1' if $File->{Opt}->{'Group Errors'};
- $thispage .= ';accept=' . uri_escape($File->{Opt}->{'Accept Header'}) if $File->{Opt}->{'Accept Header'};
- $thispage .= ';accept-language=' . uri_escape($File->{Opt}->{'Accept-Language Header'}) if $File->{Opt}->{'Accept-Language Header'};
- $thispage .= ';accept-charset=' . uri_escape($File->{Opt}->{'Accept-Charset Header'}) if $File->{Opt}->{'Accept-Charset Header'};
-
- return $thispage;
+sub self_url_file
+{
+ my $File = shift;
+
+ my $thispage = $File->{Env}->{'Self URI'};
+ my $escaped_uri = uri_escape($File->{URI});
+ $thispage .= qq(?uri=$escaped_uri);
+ $thispage .= ';ss=1' if $File->{Opt}->{'Show Source'};
+ $thispage .= ';st=1' if $File->{Opt}->{'Show Tidy'};
+ $thispage .= ';outline=1' if $File->{Opt}->{Outline};
+ $thispage .= ';No200=1' if $File->{Opt}->{No200};
+ $thispage .= ';verbose=1' if $File->{Opt}->{Verbose};
+ $thispage .= ';group=1' if $File->{Opt}->{'Group Errors'};
+ $thispage .= ';accept=' . uri_escape($File->{Opt}->{'Accept Header'})
+ if $File->{Opt}->{'Accept Header'};
+ $thispage .=
+ ';accept-language=' .
+ uri_escape($File->{Opt}->{'Accept-Language Header'})
+ if $File->{Opt}->{'Accept-Language Header'};
+ $thispage .=
+ ';accept-charset=' .
+ uri_escape($File->{Opt}->{'Accept-Charset Header'})
+ if $File->{Opt}->{'Accept-Charset Header'};
+
+ return $thispage;
}
#####
package W3C::Validator::EventHandler;
+
#
# Define global constants
use constant TRUE => 1;
@@ -2885,230 +3212,255 @@ use constant FALSE => 0;
#
# Tentative Validation Severities.
-use constant T_WARN => 4; # 0000 0100
-use constant T_ERROR => 8; # 0000 1000
+use constant T_WARN => 4; # 0000 0100
+use constant T_ERROR => 8; # 0000 1000
#
# Output flags for error processing
-use constant O_SOURCE => 1; # 0000 0001
-use constant O_CHARSET => 2; # 0000 0010
-use constant O_DOCTYPE => 4; # 0000 0100
-use constant O_NONE => 8; # 0000 1000
-
+use constant O_SOURCE => 1; # 0000 0001
+use constant O_CHARSET => 2; # 0000 0010
+use constant O_DOCTYPE => 4; # 0000 0100
+use constant O_NONE => 8; # 0000 1000
sub new
{
- my $class = shift;
- my $parser = shift;
- my $File = shift;
- my $CFG = shift;
- my $self = { _file => $File, CFG => $CFG, _parser => $parser };
- bless $self, $class;
+ my $class = shift;
+ my $parser = shift;
+ my $File = shift;
+ my $CFG = shift;
+ my $self = {_file => $File, CFG => $CFG, _parser => $parser};
+ bless $self, $class;
}
-
sub start_element
{
- my ($self, $element) = @_;
-
- my $has_xmlns = FALSE;
- my $xmlns_value = undef;
-
- # If in XML mode, find namespace used for each element.
- if (&W3C::Validator::MarkupValidator::is_xml($self->{_file})) {
- if (my $attr = $element->{Attributes}->{xmlns}) {
- $xmlns_value = "";
- # Try with SAX method
- if ($attr->{Value}) {
- $has_xmlns = TRUE;
- $xmlns_value = $attr->{Value};
- }
- #next if $has_xmlns;
-
- # The following is not SAX, but OpenSP specific.
- my $defaulted = $attr->{Defaulted} || '';
- if ($defaulted eq "specified") {
- $has_xmlns = TRUE;
- $xmlns_value .= join("", map { $_->{Data} } @{$attr->{CdataChunks}});
- }
- }
- }
-
- my $doctype = $self->{_file}->{DOCTYPE};
-
- if (!defined($self->{CFG}->{Types}->{$doctype}->{Name}) ||
- $element->{Name} ne $self->{CFG}->{Types}->{$doctype}->{Name}) {
- # add to list of non-root namespaces
- push(@{$self->{_file}->{Namespaces}}, $xmlns_value) if $has_xmlns;
- }
- elsif (!$has_xmlns and $self->{CFG}->{Types}->{$doctype}->{"Namespace Required"}) {
- # whine if the root xmlns attribute is noted as required by spec,
- # but not present
- my $err;
- my $location = $self->{_parser}->get_location();
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $location->{LineNumber};
- $err->{char} = $location->{ColumnNumber};
- $err->{num} = "no-xmlns";
- $err->{type} = "E";
- $err->{msg} = "Missing xmlns attribute for element ".$element->{Name} . ".
+ my ($self, $element) = @_;
+
+ my $has_xmlns = FALSE;
+ my $xmlns_value = undef;
+
+ # If in XML mode, find namespace used for each element.
+ if (&W3C::Validator::MarkupValidator::is_xml($self->{_file})) {
+ if (my $attr = $element->{Attributes}->{xmlns}) {
+ $xmlns_value = "";
+
+ # Try with SAX method
+ if ($attr->{Value}) {
+ $has_xmlns = TRUE;
+ $xmlns_value = $attr->{Value};
+ }
+
+ #next if $has_xmlns;
+
+ # The following is not SAX, but OpenSP specific.
+ my $defaulted = $attr->{Defaulted} || '';
+ if ($defaulted eq "specified") {
+ $has_xmlns = TRUE;
+ $xmlns_value .=
+ join("", map { $_->{Data} } @{$attr->{CdataChunks}});
+ }
+ }
+ }
+
+ my $doctype = $self->{_file}->{DOCTYPE};
+
+ if (!defined($self->{CFG}->{Types}->{$doctype}->{Name}) ||
+ $element->{Name} ne $self->{CFG}->{Types}->{$doctype}->{Name})
+ {
+
+ # add to list of non-root namespaces
+ push(@{$self->{_file}->{Namespaces}}, $xmlns_value) if $has_xmlns;
+ }
+ elsif (!$has_xmlns and
+ $self->{CFG}->{Types}->{$doctype}->{"Namespace Required"})
+ {
+
+ # whine if the root xmlns attribute is noted as required by spec,
+ # but not present
+ my $err;
+ my $location = $self->{_parser}->get_location();
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $location->{LineNumber};
+ $err->{char} = $location->{ColumnNumber};
+ $err->{num} = "no-xmlns";
+ $err->{type} = "E";
+ $err->{msg} =
+ "Missing xmlns attribute for element " . $element->{Name} . ".
The value should be: $self->{CFG}->{Types}->{$doctype}->{Namespace}";
+ # ...
+ $self->{_file}->{'Is Valid'} = FALSE;
+ push @{$self->{_file}->{Errors}}, $err;
+ }
+ elsif ($has_xmlns and
+ (defined $self->{CFG}->{Types}->{$doctype}->{Namespace}) and
+ ($xmlns_value ne $self->{CFG}->{Types}->{$doctype}->{Namespace}))
+ {
- # ...
- $self->{_file}->{'Is Valid'} = FALSE;
- push @{$self->{_file}->{Errors}}, $err;
- }
- elsif ($has_xmlns and (defined $self->{CFG}->{Types}->{$doctype}->{Namespace})
- and ($xmlns_value ne $self->{CFG}->{Types}->{$doctype}->{Namespace}) ) {
- # whine if root xmlns element is not the one specificed by the spec
- my $err;
- my $location = $self->{_parser}->get_location();
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $location->{LineNumber};
- $err->{char} = $location->{ColumnNumber};
- $err->{num} = "wrong-xmlns";
- $err->{type} = "E";
- $err->{msg} = "Wrong xmlns attribute for element $element->{Name}. ".
- "The value should be: $self->{CFG}->{Types}->{$doctype}->{Namespace}";
+ # whine if root xmlns element is not the one specificed by the spec
+ my $err;
+ my $location = $self->{_parser}->get_location();
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $location->{LineNumber};
+ $err->{char} = $location->{ColumnNumber};
+ $err->{num} = "wrong-xmlns";
+ $err->{type} = "E";
+ $err->{msg} =
+ "Wrong xmlns attribute for element $element->{Name}. " .
+ "The value should be: $self->{CFG}->{Types}->{$doctype}->{Namespace}";
- # ...
- $self->{_file}->{'Is Valid'} = FALSE;
- push @{$self->{_file}->{Errors}}, $err;
- }
+ # ...
+ $self->{_file}->{'Is Valid'} = FALSE;
+ push @{$self->{_file}->{Errors}}, $err;
+ }
}
-
sub error
{
- my $self = shift;
- my $error = shift;
- my $mess;
- eval {
- $mess = $self->{_parser}->split_message($error);
- };
- if ($@) {
- # this is a message that S:P:O could not handle, we skip its croaking
- return;
- }
- my $File = $self->{_file};
-
- # TODO: this does not filter out errors in DTDs.
-
- my $err;
-
- $err->{src} = '...'; # do this with show_open_entities()?
- $err->{line} = $mess->{primary_message}{LineNumber};
- $err->{char} = $mess->{primary_message}{ColumnNumber}+1;
- $err->{num} = $mess->{primary_message}{Number};
- $err->{type} = $mess->{primary_message}{Severity};
- $err->{msg} = $mess->{primary_message}{Text};
-
- $err->{msg} =~ s/"&"/"&amp;"/gsi;
-
- # our parser OpenSP is not quite XML-aware, or XML Namespaces Aware,
- # so we filter out a few errors for now
-
- my $is_xml = &W3C::Validator::MarkupValidator::is_xml($File);
-
- if ($is_xml and $err->{num} eq '108' and $err->{msg} =~ m{ "xmlns:\S+"}) {
- # the error is about a missing xmlns: attribute definition"
- return; # this is not an error, 'cause we said so
- }
-
- if ($err->{num} eq '187')
- # filtering out no "document type declaration; will parse without validation"
- # if root element is not html and mode is xml...
- {
- # since parsing was done without validation, result can only be "well-formed"
- if ($is_xml and lc($File->{Root}) ne 'html') {
- $File->{XMLWF_ONLY} = TRUE;
- W3C::Validator::MarkupValidator::add_warning('W09xml', {});
- return; # don't report this as an error, just proceed
- }
- # if mode is not XML, we do report the error. It should not happen in the case of <html> without doctype,
- # in that case the error message will be #344
- }
-
- if (($err->{num} eq '113') and ($err->{msg} =~ /xml:space/)) {
- # FIXME
- # this is a problem with some of the "flattened" W3C DTDs, filtering them out to not confuse users.
- # hoping to get the DTDs fixed, see http://lists.w3.org/Archives/Public/www-html-editor/2007AprJun/0010.html
- return; # don't report this, just proceed
- }
-
- if ($is_xml and $err->{num} eq '344' and $File->{Namespace}) {
- # we are in XML mode, we have a namespace, but no doctype.
- # the validator will already have said "no doctype, falling back to default" above
- # no need to report this.
- return; # don't report this, just proceed
- }
-
- if (($err->{num} eq '248') or ($err->{num} eq '247') or ($err->{num} eq '246')) {
- # these two errors should be triggered by -wmin-tag to report shorttag used,
- # but we're making them warnings, not errors
- # see http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.7
- $err->{type} = "W";
- }
-
- # Workaround for onsgmls as of 1.5 sometimes allegedly reporting errors
- # beyond EOL. If you see this warning in your web server logs, please
- # let the validator developers know, see http://validator.w3.org/feedback.html
- # As long as $err may be from somewhere else than the document (such as
- # from a DTD) and we have no way of identifying these cases, this
- # produces bogus results and error log spewage, so commented out for now.
-# if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) {
-# warn("Warning: reported error column larger than line length " .
-# "($err->{char} > $l) in $File->{URI} line $err->{line}, " .
-# "OpenSP bug? Resetting to line length.");
-# $err->{char} = $l;
-# }
-
- # No or unknown FPI and a relative SI.
- if ($err->{msg} =~ m(cannot (open|find))) {
- $File->{'Error Flagged'} = TRUE;
- &W3C::Validator::MarkupValidator::get_template($File, 'fatal-error.tmpl')->param(
- fatal_parse_extid_error => TRUE,
- fatal_parse_extid_msg => $err->{msg},
- );
- }
+ my $self = shift;
+ my $error = shift;
+ my $mess;
+ eval { $mess = $self->{_parser}->split_message($error); };
+ if ($@) {
+
+ # this is a message that S:P:O could not handle, we skip its croaking
+ return;
+ }
+ my $File = $self->{_file};
+
+ # TODO: this does not filter out errors in DTDs.
+
+ my $err;
+
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $mess->{primary_message}{LineNumber};
+ $err->{char} = $mess->{primary_message}{ColumnNumber} + 1;
+ $err->{num} = $mess->{primary_message}{Number};
+ $err->{type} = $mess->{primary_message}{Severity};
+ $err->{msg} = $mess->{primary_message}{Text};
+
+ $err->{msg} =~ s/"&"/"&amp;"/gsi;
+
+ # our parser OpenSP is not quite XML-aware, or XML Namespaces Aware,
+ # so we filter out a few errors for now
+
+ my $is_xml = &W3C::Validator::MarkupValidator::is_xml($File);
+
+ if ($is_xml and $err->{num} eq '108' and $err->{msg} =~ m{ "xmlns:\S+"}) {
+
+ # the error is about a missing xmlns: attribute definition"
+ return; # this is not an error, 'cause we said so
+ }
+
+ if ($err->{num} eq '187')
+
+ # filtering out no "document type declaration; will parse without validation"
+ # if root element is not html and mode is xml...
+ {
+
+ # since parsing was done without validation, result can only be "well-formed"
+ if ($is_xml and lc($File->{Root}) ne 'html') {
+ $File->{XMLWF_ONLY} = TRUE;
+ W3C::Validator::MarkupValidator::add_warning('W09xml', {});
+ return; # don't report this as an error, just proceed
+ }
+
+ # if mode is not XML, we do report the error. It should not happen in the case of <html> without doctype,
+ # in that case the error message will be #344
+ }
+
+ if (($err->{num} eq '113') and ($err->{msg} =~ /xml:space/)) {
+
+ # FIXME
+ # this is a problem with some of the "flattened" W3C DTDs, filtering them out to not confuse users.
+ # hoping to get the DTDs fixed, see http://lists.w3.org/Archives/Public/www-html-editor/2007AprJun/0010.html
+ return; # don't report this, just proceed
+ }
- # No DOCTYPE found! We are falling back to vanilla DTD
- if ($err->{msg} =~ m(prolog can\'t be omitted)) {
- if (lc($File->{Root}) eq 'html') {
- my $dtd = ($is_xml ?
- $File->{"Default DOCTYPE"}->{"XHTML"} : $File->{"Default DOCTYPE"}->{"HTML"} );
- W3C::Validator::MarkupValidator::add_warning('W09', {W09_dtd => $dtd});
+ if ($is_xml and $err->{num} eq '344' and $File->{Namespace}) {
+
+ # we are in XML mode, we have a namespace, but no doctype.
+ # the validator will already have said "no doctype, falling back to default" above
+ # no need to report this.
+ return; # don't report this, just proceed
}
- else { # not html root element, we are not using fallback
- unless ($is_xml) {
- $File->{'Is Valid'} = FALSE;
- W3C::Validator::MarkupValidator::add_warning('W09nohtml', {});
- }
+
+ if (($err->{num} eq '248') or
+ ($err->{num} eq '247') or
+ ($err->{num} eq '246'))
+ {
+
+ # these two errors should be triggered by -wmin-tag to report shorttag used,
+ # but we're making them warnings, not errors
+ # see http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.7
+ $err->{type} = "W";
+ }
+
+ # Workaround for onsgmls as of 1.5 sometimes allegedly reporting errors
+ # beyond EOL. If you see this warning in your web server logs, please
+ # let the validator developers know, see http://validator.w3.org/feedback.html
+ # As long as $err may be from somewhere else than the document (such as
+ # from a DTD) and we have no way of identifying these cases, this
+ # produces bogus results and error log spewage, so commented out for now.
+ # if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) {
+ # warn("Warning: reported error column larger than line length " .
+ # "($err->{char} > $l) in $File->{URI} line $err->{line}, " .
+ # "OpenSP bug? Resetting to line length.");
+ # $err->{char} = $l;
+ # }
+
+ # No or unknown FPI and a relative SI.
+ if ($err->{msg} =~ m(cannot (open|find))) {
+ $File->{'Error Flagged'} = TRUE;
+ &W3C::Validator::MarkupValidator::get_template($File,
+ 'fatal-error.tmpl')->param(
+ fatal_parse_extid_error => TRUE,
+ fatal_parse_extid_msg => $err->{msg},
+ );
+ }
+
+ # No DOCTYPE found! We are falling back to vanilla DTD
+ if ($err->{msg} =~ m(prolog can\'t be omitted)) {
+ if (lc($File->{Root}) eq 'html') {
+ my $dtd =
+ ($is_xml ? $File->{"Default DOCTYPE"}->{"XHTML"} :
+ $File->{"Default DOCTYPE"}->{"HTML"});
+ W3C::Validator::MarkupValidator::add_warning('W09',
+ {W09_dtd => $dtd});
+ }
+ else { # not html root element, we are not using fallback
+ unless ($is_xml) {
+ $File->{'Is Valid'} = FALSE;
+ W3C::Validator::MarkupValidator::add_warning('W09nohtml', {});
+ }
+ }
+
+ return; # Don't report this as a normal error.
}
- return; # Don't report this as a normal error.
- }
+ # TODO: calling exit() here is probably a bad idea
+ W3C::Validator::MarkupValidator::abort_if_error_flagged($File, O_DOCTYPE);
- # TODO: calling exit() here is probably a bad idea
- W3C::Validator::MarkupValidator::abort_if_error_flagged($File, O_DOCTYPE);
+ push @{$File->{Errors}}, $err;
+
+ # ...
+ $File->{'Is Valid'} = FALSE if $err->{type} eq 'E';
- push @{$File->{Errors}}, $err;
- # ...
- $File->{'Is Valid'} = FALSE if $err->{type} eq 'E';
+ if (defined $mess->{aux_message}) {
- if (defined $mess->{aux_message})
- {
- # "duplicate id ... first defined here" style messages
- push @{$File->{Errors}}, { line => $mess->{aux_message}{LineNumber},
- char => $mess->{aux_message}{ColumnNumber}+1,
- msg => $mess->{aux_message}{Text},
- type => 'I',
- };
- }
+ # "duplicate id ... first defined here" style messages
+ push @{$File->{Errors}},
+ {
+ line => $mess->{aux_message}{LineNumber},
+ char => $mess->{aux_message}{ColumnNumber} + 1,
+ msg => $mess->{aux_message}{Text},
+ type => 'I',
+ };
+ }
}
package W3C::Validator::EventHandler::Outliner;
+
#
# Define global constants
use constant TRUE => 1;
@@ -3116,152 +3468,161 @@ use constant FALSE => 0;
#
# Tentative Validation Severities.
-use constant T_WARN => 4; # 0000 0100
-use constant T_ERROR => 8; # 0000 1000
+use constant T_WARN => 4; # 0000 0100
+use constant T_ERROR => 8; # 0000 1000
#
# Output flags for error processing
-use constant O_SOURCE => 1; # 0000 0001
-use constant O_CHARSET => 2; # 0000 0010
-use constant O_DOCTYPE => 4; # 0000 0100
-use constant O_NONE => 8; # 0000 1000
+use constant O_SOURCE => 1; # 0000 0001
+use constant O_CHARSET => 2; # 0000 0010
+use constant O_DOCTYPE => 4; # 0000 0100
+use constant O_NONE => 8; # 0000 1000
use base qw(W3C::Validator::EventHandler);
sub new
{
- my $class = shift;
- my $parser = shift;
- my $File = shift;
- my $CFG = shift;
- my $self = $class->SUPER::new($parser, $File, $CFG);
- $self->{am_in_heading} = 0;
- $self->{heading_text} = [];
- bless $self, $class;
+ my $class = shift;
+ my $parser = shift;
+ my $File = shift;
+ my $CFG = shift;
+ my $self = $class->SUPER::new($parser, $File, $CFG);
+ $self->{am_in_heading} = 0;
+ $self->{heading_text} = [];
+ bless $self, $class;
}
sub data
{
- my ($self, $chars) = @_;
- push(@{$self->{heading_text}}, $chars->{Data}) if $self->{am_in_heading};
+ my ($self, $chars) = @_;
+ push(@{$self->{heading_text}}, $chars->{Data}) if $self->{am_in_heading};
}
sub start_element
{
- my ($self, $element) = @_;
- if ($element->{Name} =~ /^h([1-6])$/i) {
- $self->{_file}->{heading_outline} ||= "";
- $self->{_file}->{heading_outline} .=
- " " x int($1) . "[$element->{Name}] ";
- $self->{am_in_heading} = 1;
- }
+ my ($self, $element) = @_;
+ if ($element->{Name} =~ /^h([1-6])$/i) {
+ $self->{_file}->{heading_outline} ||= "";
+ $self->{_file}->{heading_outline} .=
+ " " x int($1) . "[$element->{Name}] ";
+ $self->{am_in_heading} = 1;
+ }
- return $self->SUPER::start_element($element);
+ return $self->SUPER::start_element($element);
}
sub end_element
{
- my ($self, $element) = @_;
- if ($element->{Name} =~ /^h[1-6]$/i) {
- my $text = join("", @{$self->{heading_text}});
- $text =~ s/^\s+//g;
- $text =~ s/\s+/ /g;
- $text =~ s/\s+$//g;
- $self->{_file}->{heading_outline} .= "$text\n";
- $self->{am_in_heading} = 0;
- $self->{heading_text} = [];
- }
+ my ($self, $element) = @_;
+ if ($element->{Name} =~ /^h[1-6]$/i) {
+ my $text = join("", @{$self->{heading_text}});
+ $text =~ s/^\s+//g;
+ $text =~ s/\s+/ /g;
+ $text =~ s/\s+$//g;
+ $self->{_file}->{heading_outline} .= "$text\n";
+ $self->{am_in_heading} = 0;
+ $self->{heading_text} = [];
+ }
}
-
#####
package W3C::Validator::UserAgent;
-use HTTP::Message qw();
-use LWP::UserAgent 2.032 qw(); # Need 2.032 for default_header()
-use Net::hostent qw(gethostbyname);
-use Net::IP qw();
-use Socket qw(inet_ntoa);
+use HTTP::Message qw();
+use LWP::UserAgent 2.032 qw(); # Need 2.032 for default_header()
+use Net::hostent qw(gethostbyname);
+use Net::IP qw();
+use Socket qw(inet_ntoa);
use base qw(LWP::UserAgent);
-BEGIN
-{
- # The 4k default line length in LWP <= 5.832 isn't enough for example to
- # accommodate 4kB cookies (RFC 2985); bump it (#6678).
- require LWP::Protocol::http;
- push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 8*1024);
+BEGIN {
+
+ # The 4k default line length in LWP <= 5.832 isn't enough for example to
+ # accommodate 4kB cookies (RFC 2985); bump it (#6678).
+ require LWP::Protocol::http;
+ push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 8 * 1024);
}
-sub new {
- my ($proto, $CFG, $File, @rest) = @_;
- my $class = ref($proto) || $proto;
- my $self = $class->SUPER::new(@rest);
+sub new
+{
+ my ($proto, $CFG, $File, @rest) = @_;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new(@rest);
- $self->{'W3C::Validator::CFG'} = $CFG;
- $self->{'W3C::Validator::File'} = $File;
+ $self->{'W3C::Validator::CFG'} = $CFG;
+ $self->{'W3C::Validator::File'} = $File;
- $self->env_proxy();
- $self->agent($File->{Opt}->{'User Agent'});
- $self->protocols_allowed($CFG->{Protocols}->{Allow} || ['http', 'https']);
+ $self->env_proxy();
+ $self->agent($File->{Opt}->{'User Agent'});
+ $self->protocols_allowed($CFG->{Protocols}->{Allow} || ['http', 'https']);
- # Don't parse the http-equiv stuff.
- $self->parse_head(0);
+ # Don't parse the http-equiv stuff.
+ $self->parse_head(0);
- # Tell caches in the middle we want a fresh copy (Bug 4998).
- $self->default_header('Cache-Control' => 'max-age=0');
+ # Tell caches in the middle we want a fresh copy (Bug 4998).
+ $self->default_header('Cache-Control' => 'max-age=0');
- # If not in debug mode, set Accept-Encoding to what LWP (>= 5.816) can handle
- $self->default_header('Accept-Encoding' => scalar HTTP::Message::decodable())
- if (!$File->{Opt}->{Debug} && HTTP::Message->can('decodable'));
+ # If not in debug mode, set Accept-Encoding to what LWP (>= 5.816) can handle
+ $self->default_header(
+ 'Accept-Encoding' => scalar HTTP::Message::decodable())
+ if (!$File->{Opt}->{Debug} && HTTP::Message->can('decodable'));
- return $self;
+ return $self;
}
-sub redirect_ok {
- my ($self, $req, $res) = @_;
- return $self->SUPER::redirect_ok($req, $res) && $self->uri_ok($req->uri());
+sub redirect_ok
+{
+ my ($self, $req, $res) = @_;
+ return $self->SUPER::redirect_ok($req, $res) && $self->uri_ok($req->uri());
}
-sub uri_ok {
- my ($self, $uri) = @_;
-
- return 1 if ($self->{'W3C::Validator::CFG'}->{'Allow Private IPs'} or
- !$uri->can('host'));
-
- my $h5uri = $self->{'W3C::Validator::CFG'}->{External}->{HTML5};
- if ($h5uri) {
- my $clone = $uri->clone(); $clone->query(undef); $clone->fragment(undef);
- $h5uri = URI->new($h5uri); $h5uri->query(undef); $h5uri->fragment(undef);
- return 1 if $clone->eq($h5uri);
- }
-
- my $addr = my $iptype = undef;
- if (my $host = gethostbyname($uri->host())) {
- $addr = inet_ntoa($host->addr()) if $host->addr();
- if ($addr && (my $ip = Net::IP->new($addr))) {
- $iptype = $ip->iptype();
+sub uri_ok
+{
+ my ($self, $uri) = @_;
+
+ return 1
+ if ($self->{'W3C::Validator::CFG'}->{'Allow Private IPs'} or
+ !$uri->can('host'));
+
+ my $h5uri = $self->{'W3C::Validator::CFG'}->{External}->{HTML5};
+ if ($h5uri) {
+ my $clone = $uri->clone();
+ $clone->query(undef);
+ $clone->fragment(undef);
+ $h5uri = URI->new($h5uri);
+ $h5uri->query(undef);
+ $h5uri->fragment(undef);
+ return 1 if $clone->eq($h5uri);
+ }
+
+ my $addr = my $iptype = undef;
+ if (my $host = gethostbyname($uri->host())) {
+ $addr = inet_ntoa($host->addr()) if $host->addr();
+ if ($addr && (my $ip = Net::IP->new($addr))) {
+ $iptype = $ip->iptype();
+ }
}
- }
- if ($iptype && $iptype ne 'PUBLIC') {
- my $File = $self->{'W3C::Validator::File'};
- $File->{'Error Flagged'} = 1;
- my $tmpl = &W3C::Validator::MarkupValidator::get_template($File, 'fatal-error.tmpl');
- $tmpl->param(
- fatal_ip_error => 1,
- fatal_ip_host => $uri->host() || 'undefined',
- );
- $tmpl->param(fatal_ip_hostname => 1) if ($addr and $uri->host() ne $addr);
- return 0;
- }
- return 1;
+ if ($iptype && $iptype ne 'PUBLIC') {
+ my $File = $self->{'W3C::Validator::File'};
+ $File->{'Error Flagged'} = 1;
+ my $tmpl = &W3C::Validator::MarkupValidator::get_template($File,
+ 'fatal-error.tmpl');
+ $tmpl->param(
+ fatal_ip_error => 1,
+ fatal_ip_host => $uri->host() || 'undefined',
+ );
+ $tmpl->param(fatal_ip_hostname => 1)
+ if ($addr and $uri->host() ne $addr);
+ return 0;
+ }
+ return 1;
}
# Local Variables:
# mode: perl
# indent-tabs-mode: nil
-# tab-width: 2
-# perl-indent-level: 2
+# perl-indent-level: 4
# End:
-# ex: ts=2 sw=2 et
+# ex: ts=4 sw=4 et
diff --git a/httpd/cgi-bin/sendfeedback.pl b/httpd/cgi-bin/sendfeedback.pl
index 2dd9032..292bdd3 100755
--- a/httpd/cgi-bin/sendfeedback.pl
+++ b/httpd/cgi-bin/sendfeedback.pl
@@ -1,21 +1,21 @@
#!/usr/bin/perl -T
##
## feedback generator for W3C Markup Validation Service
-# # $Id: sendfeedback.pl,v 1.12 2009-06-29 14:37:08 ville Exp $
+# # $Id: sendfeedback.pl,v 1.13 2009-11-23 22:15:18 ville Exp $
## Pragmas.
use strict;
use warnings;
-
## Modules. See also the BEGIN block further down below.
-use CGI qw();
+use CGI qw();
use File::Spec::Functions qw(catfile);
-use HTML::Template 2.6 qw();
-use Config::General 2.32 qw(); # Need 2.32 for <msg 0>, rt.cpan.org#17852
+use HTML::Template 2.6 qw();
+use Config::General 2.32 qw(); # Need 2.32 for <msg 0>, rt.cpan.org#17852
use vars qw($DEBUG $CFG %RSRC $VERSION);
+
# Define global constants
use constant TRUE => 1;
use constant FALSE => 0;
@@ -23,60 +23,63 @@ use constant FALSE => 0;
# Things inside BEGIN don't happen on every request in persistent
# environments, such as mod_perl. So let's do globals, eg. read config here.
BEGIN {
- # Launder data for -T; -AutoLaunder doesn't catch this one.
- if (exists $ENV{W3C_VALIDATOR_HOME}) {
- $ENV{W3C_VALIDATOR_HOME} =~ /^(.*)$/;
- $ENV{W3C_VALIDATOR_HOME} = $1;
- }
-
- #
- # Read Config Files.
- eval {
- my %config_opts = (
- -ConfigFile => ($ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf'),
- -MergeDuplicateOptions => TRUE,
- -MergeDuplicateBlocks => TRUE,
- -SplitPolicy => 'equalsign',
- -UseApacheInclude => TRUE,
- -IncludeRelative => TRUE,
- -InterPolateVars => TRUE,
- -AutoLaunder => TRUE,
- -AutoTrue => TRUE,
- -DefaultConfig => {
- Paths => {
- Base => ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
- },
- },
- );
- my %cfg = Config::General->new(%config_opts)->getall();
- $CFG = \%cfg;
- };
- if ($@) {
- die <<".EOF.";
+
+ # Launder data for -T; -AutoLaunder doesn't catch this one.
+ if (exists $ENV{W3C_VALIDATOR_HOME}) {
+ $ENV{W3C_VALIDATOR_HOME} =~ /^(.*)$/;
+ $ENV{W3C_VALIDATOR_HOME} = $1;
+ }
+
+ #
+ # Read Config Files.
+ eval {
+ my %config_opts = (
+ -ConfigFile =>
+ ($ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf'),
+ -MergeDuplicateOptions => TRUE,
+ -MergeDuplicateBlocks => TRUE,
+ -SplitPolicy => 'equalsign',
+ -UseApacheInclude => TRUE,
+ -IncludeRelative => TRUE,
+ -InterPolateVars => TRUE,
+ -AutoLaunder => TRUE,
+ -AutoTrue => TRUE,
+ -DefaultConfig => {
+ Paths => {
+ Base =>
+ ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
+ },
+ },
+ );
+ my %cfg = Config::General->new(%config_opts)->getall();
+ $CFG = \%cfg;
+ };
+ if ($@) {
+ die <<".EOF.";
Could not read configuration. Set the W3C_VALIDATOR_CFG environment variable
or copy conf/* to /etc/w3c/. Make sure that the configuration file and all
included files are readable by the web server user. The error was:\n'$@'
.EOF.
- }
-} # end of BEGIN block.
+ }
+} # end of BEGIN block.
#
# Get rid of (possibly insecure) $PATH.
delete $ENV{PATH};
-our $q = new CGI;
-our $lang = 'en_US'; # @@@ TODO: conneg
+our $q = new CGI;
+our $lang = 'en_US'; # @@@ TODO: conneg
# Read error message + explanations file
%RSRC = Config::General->new(
- -MergeDuplicateBlocks => 1,
- -ConfigFile => catfile($CFG->{Paths}->{Templates}, $lang,
- 'error_messages.cfg'),
- )->getall();
+ -MergeDuplicateBlocks => 1,
+ -ConfigFile =>
+ catfile($CFG->{Paths}->{Templates}, $lang, 'error_messages.cfg'),
+)->getall();
our $T = HTML::Template->new(
- filename => catfile($CFG->{Paths}->{Templates}, $lang, 'feedback.tmpl'),
- die_on_bad_params => FALSE,
+ filename => catfile($CFG->{Paths}->{Templates}, $lang, 'feedback.tmpl'),
+ die_on_bad_params => FALSE,
);
our $errlist = "";
@@ -84,13 +87,15 @@ our $errmsg_text;
our $validated_uri;
our $errmsg_id;
-sub process_query {
+sub process_query
+{
$validated_uri = $q->param('uri');
- $errmsg_id = $q->param('errmsg_id');
+ $errmsg_id = $q->param('errmsg_id');
if ($errmsg_id) {
$errmsg_text = "$RSRC{msg}->{$errmsg_id}->{original}";
$errmsg_text = de_template_explanation($errmsg_text);
}
+
# Trigger "thanks for your message. If your query requires an answer,..." ack paragraph
my $sent = $q->param('send');
if ($sent) {
@@ -99,33 +104,42 @@ sub process_query {
}
}
-sub send_message {
-# sends message to www-validator list @@ TODO @@
+sub send_message
+{
+
+ # sends message to www-validator list @@ TODO @@
}
-sub error_choices {
-# creates drop-down menu with all possible error messages to send feedback about
- my @msgnumbers = keys( %{$RSRC{msg}} );
+sub error_choices
+{
+
+ # creates drop-down menu with all possible error messages to send feedback about
+ my @msgnumbers = keys(%{$RSRC{msg}});
@msgnumbers = sort { $a <=> $b } @msgnumbers;
my $errlabel;
- for my $errnum ( @msgnumbers ) {
+ for my $errnum (@msgnumbers) {
$errlabel = $RSRC{msg}->{$errnum}->{original};
$errlabel = de_template_explanation($errlabel);
- if (length($errlabel) > 70) { $errlabel = substr($errlabel, 0, 67)."..." }
- $errlist = $errlist.'<option value="'. $errnum.'"';
+ if (length($errlabel) > 70) {
+ $errlabel = substr($errlabel, 0, 67) . "...";
+ }
+ $errlist = $errlist . '<option value="' . $errnum . '"';
if ($errmsg_id) {
- if ($errnum == $errmsg_id) { $errlist = $errlist.'selected="selected" '; }
+ if ($errnum == $errmsg_id) {
+ $errlist = $errlist . 'selected="selected" ';
+ }
}
- $errlist = $errlist."> $errnum $errlabel</option>\n";
+ $errlist = $errlist . "> $errnum $errlabel</option>\n";
}
}
+sub de_template_explanation
+{
-sub de_template_explanation {
-# takes the error message template, and replace "template keywords" with real life keywords
+ # takes the error message template, and replace "template keywords" with real life keywords
my $explanation = shift;
- if ($explanation){
+ if ($explanation) {
$explanation =~ s/\%1/X/;
$explanation =~ s/\%2/Y/;
$explanation =~ s/\%3/Z/;
@@ -136,24 +150,26 @@ sub de_template_explanation {
return $explanation;
}
+sub prepare_error_message
+{
-sub prepare_error_message {
-# if the form sent contains errors (what kind exactly?)
-# @@ TODO @@
+ # if the form sent contains errors (what kind exactly?)
+ # @@ TODO @@
}
-sub print_prefilled_form {
+sub print_prefilled_form
+{
$T->param(page_title => "Feedback");
- $T->param(uri => $validated_uri);
- $T->param(errmsg_id => $errmsg_id);
-# $T->param(errlist => $errlist);
+ $T->param(uri => $validated_uri);
+ $T->param(errmsg_id => $errmsg_id);
+
+ # $T->param(errlist => $errlist);
$T->param(explanation => $errmsg_text);
print $T->output;
}
-
-
process_query;
+
#error_choices;
print_prefilled_form;
diff --git a/misc/bundle/Makefile.PL b/misc/bundle/Makefile.PL
index 794a36c..5f6fdee 100644
--- a/misc/bundle/Makefile.PL
+++ b/misc/bundle/Makefile.PL
@@ -8,43 +8,45 @@ WriteMakefile(
LICENSE => 'open_source',
VERSION_FROM => 'lib/Bundle/W3C/Validator.pm',
PREREQ_PM => {
- # Hard dependencies:
- CGI => 2.81,
- CGI::Carp => 0,
- Config::General => 2.32,
- Encode => 0,
- Encode::Alias => 0,
- Encode::HanExtra => 0,
- File::Spec::Functions => 0,
- HTML::Encoding => 0.52,
- HTML::Parser => 3.24,
- HTML::Template => 2.6,
- HTTP::Headers::Auth => 0,
- HTTP::Headers::Util => 0,
- HTTP::Message => 1.52,
- HTTP::Negotiate => 0,
- HTTP::Request => 0,
- JSON => 2.00,
- LWP::UserAgent => 2.032,
- Net::hostent => 0,
- Net::IP => 0,
- SGML::Parser::OpenSP => 0.991,
- Socket => 0,
- URI => 0,
- URI::Escape => 0,
- XML::LibXML => 0,
- # Optional:
- Encode::JIS2K => 0,
- HTML::Tidy => 0,
- },
- depend => { distdir => 'ChangeLog' },
- dist => { TARFLAGS => '--owner=0 --group=0 -cvf' },
- clean => { FILES => 'ChangeLog.bak' },
+ # Hard dependencies:
+ CGI => 2.81,
+ CGI::Carp => 0,
+ Config::General => 2.32,
+ Encode => 0,
+ Encode::Alias => 0,
+ Encode::HanExtra => 0,
+ File::Spec::Functions => 0,
+ HTML::Encoding => 0.52,
+ HTML::Parser => 3.24,
+ HTML::Template => 2.6,
+ HTTP::Headers::Auth => 0,
+ HTTP::Headers::Util => 0,
+ HTTP::Message => 1.52,
+ HTTP::Negotiate => 0,
+ HTTP::Request => 0,
+ JSON => 2.00,
+ LWP::UserAgent => 2.032,
+ Net::hostent => 0,
+ Net::IP => 0,
+ SGML::Parser::OpenSP => 0.991,
+ Socket => 0,
+ URI => 0,
+ URI::Escape => 0,
+ XML::LibXML => 0,
+
+ # Optional:
+ Encode::JIS2K => 0,
+ HTML::Tidy => 0,
+ },
+ depend => {distdir => 'ChangeLog'},
+ dist => {TARFLAGS => '--owner=0 --group=0 -cvf'},
+ clean => {FILES => 'ChangeLog.bak'},
);
-sub MY::postamble {
- return <<'MAKE_FRAG';
+sub MY::postamble
+{
+ return <<'MAKE_FRAG';
ChangeLog: README lib/Bundle/W3C/Validator.pm t/00load.t
cvs2cl --FSF --utc --prune \
--ignore ChangeLog --ignore cvsignore --ignore SIGNATURE \
diff --git a/misc/docs_errors.pl b/misc/docs_errors.pl
index 28ae8e4..b6774c7 100755
--- a/misc/docs_errors.pl
+++ b/misc/docs_errors.pl
@@ -2,143 +2,148 @@
##
## Generates HTML documentation of error messages and explanations
## for W3C Markup Validation Service
-## $Id: docs_errors.pl,v 1.11 2009-06-29 14:37:08 ville Exp $
+## $Id: docs_errors.pl,v 1.12 2009-11-23 22:15:18 ville Exp $
## Pragmas.
use strict;
use warnings;
-
## Modules. See also the BEGIN block further down below.
use File::Spec::Functions qw(catfile);
-use HTML::Template 2.6 qw();
-use Config::General 2.32 qw(); # Need 2.32 for <msg 0>, rt.cpan.org#17852
+use HTML::Template 2.6 qw();
+use Config::General 2.32 qw(); # Need 2.32 for <msg 0>, rt.cpan.org#17852
use vars qw($DEBUG $CFG $VERSION);
+
# Define global constants
use constant TRUE => 1;
use constant FALSE => 0;
BEGIN {
- # Launder data for -T; -AutoLaunder doesn't catch this one.
- if (exists $ENV{W3C_VALIDATOR_HOME}) {
- $ENV{W3C_VALIDATOR_HOME} =~ /^(.*)$/;
- $ENV{W3C_VALIDATOR_HOME} = $1;
- }
-
- #
- # Read Config Files.
- eval {
- my %config_opts = (
- -ConfigFile => ($ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf'),
- -MergeDuplicateOptions => TRUE,
- -MergeDuplicateBlocks => TRUE,
- -SplitPolicy => 'equalsign',
- -UseApacheInclude => TRUE,
- -IncludeRelative => TRUE,
- -InterPolateVars => TRUE,
- -AutoLaunder => TRUE,
- -AutoTrue => TRUE,
- -DefaultConfig => {
- Paths => {
- Base => ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
- },
- },
- );
- my %cfg = Config::General->new(%config_opts)->getall();
- $CFG = \%cfg;
- };
- if ($@) {
- die <<".EOF.";
+
+ # Launder data for -T; -AutoLaunder doesn't catch this one.
+ if (exists $ENV{W3C_VALIDATOR_HOME}) {
+ $ENV{W3C_VALIDATOR_HOME} =~ /^(.*)$/;
+ $ENV{W3C_VALIDATOR_HOME} = $1;
+ }
+
+ #
+ # Read Config Files.
+ eval {
+ my %config_opts = (
+ -ConfigFile =>
+ ($ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf'),
+ -MergeDuplicateOptions => TRUE,
+ -MergeDuplicateBlocks => TRUE,
+ -SplitPolicy => 'equalsign',
+ -UseApacheInclude => TRUE,
+ -IncludeRelative => TRUE,
+ -InterPolateVars => TRUE,
+ -AutoLaunder => TRUE,
+ -AutoTrue => TRUE,
+ -DefaultConfig => {
+ Paths => {
+ Base =>
+ ($ENV{W3C_VALIDATOR_HOME} || '/usr/local/validator'),
+ },
+ },
+ );
+ my %cfg = Config::General->new(%config_opts)->getall();
+ $CFG = \%cfg;
+ };
+ if ($@) {
+ die <<".EOF.";
Could not read configuration. Set the W3C_VALIDATOR_CFG environment variable
or copy conf/* to /etc/w3c/. Make sure that the configuration file and all
included files are readable by the web server user. The error was:\n'$@'
.EOF.
- }
-} # end of BEGIN block.
+ }
+} # end of BEGIN block.
#
# Get rid of (possibly insecure) $PATH.
delete $ENV{PATH};
-our $lang = 'en_US'; # @@@ TODO: conneg
+our $lang = 'en_US'; # @@@ TODO: conneg
# Read error message + explanations file
-our $error_messages_file = catfile($CFG->{Paths}->{Templates}, $lang, 'error_messages.cfg');
-our %config_errs = ( -MergeDuplicateBlocks => 1,
- -ConfigFile => $error_messages_file);
+our $error_messages_file =
+ catfile($CFG->{Paths}->{Templates}, $lang, 'error_messages.cfg');
+our %config_errs = (
+ -MergeDuplicateBlocks => 1,
+ -ConfigFile => $error_messages_file
+);
our %rsrc = Config::General->new(%config_errs)->getall();
-
our $T = HTML::Template->new(
- filename => catfile($CFG->{Paths}->{Templates}, $lang, 'docs_errors.tmpl'),
- die_on_bad_params => FALSE,
+ filename => catfile($CFG->{Paths}->{Templates}, $lang, 'docs_errors.tmpl'),
+ die_on_bad_params => FALSE,
);
$T->param(list_errors_hasverbose => &list_errors_hasverbose(\%rsrc));
-$T->param(list_errors_noverbose => &list_errors_noverbose(\%rsrc));
+$T->param(list_errors_noverbose => &list_errors_noverbose(\%rsrc));
print $T->output;
-sub list_errors_hasverbose{
- my $rsrc = shift;
+sub list_errors_hasverbose
+{
+ my $rsrc = shift;
my $errors = [];
my $error_id;
- my $max_error_id=500; # where to stop
- for ($error_id=0;$error_id<$max_error_id;$error_id++)
- {
- my %single_error;
- if ($rsrc->{msg}->{$error_id})
- {
- my $verbose = $rsrc->{msg}->{$error_id}->{verbose};
- if ($verbose)
- {
- my $original = $rsrc->{msg}->{$error_id}->{original};
- $original = &de_template_explanation($original);
- $single_error{original} = $original;
- $single_error{id} = $error_id;
- $single_error{verbose} = $rsrc->{msg}->{$error_id}->{verbose};
- $single_error{verbose} =~ s/<!--CFG_HOME_PAGE-->/$CFG->{'Home Page'}/g;
-
- push @{$errors}, \%single_error;
+ my $max_error_id = 500; # where to stop
+ for ($error_id = 0; $error_id < $max_error_id; $error_id++) {
+ my %single_error;
+ if ($rsrc->{msg}->{$error_id}) {
+ my $verbose = $rsrc->{msg}->{$error_id}->{verbose};
+ if ($verbose) {
+ my $original = $rsrc->{msg}->{$error_id}->{original};
+ $original = &de_template_explanation($original);
+ $single_error{original} = $original;
+ $single_error{id} = $error_id;
+ $single_error{verbose} = $rsrc->{msg}->{$error_id}->{verbose};
+ $single_error{verbose} =~
+ s/<!--CFG_HOME_PAGE-->/$CFG->{'Home Page'}/g;
+
+ push @{$errors}, \%single_error;
+
# Fix up relative paths (/check vs /docs/errors.html)
s/href="docs\//href="/
for $single_error{original}, $single_error{verbose};
- }
+ }
- }
+ }
}
- return $errors;
+ return $errors;
}
-sub list_errors_noverbose{
- my $rsrc = shift;
+sub list_errors_noverbose
+{
+ my $rsrc = shift;
my $errors = [];
my $error_id;
- my $max_error_id=500; # where to stop
- for ($error_id=0;$error_id<$max_error_id;$error_id++)
- {
- my %single_error;
- if ($rsrc->{msg}->{$error_id})
- {
- my $verbose = $rsrc->{msg}->{$error_id}->{verbose};
- if (! $verbose)
- {
- my $original = $rsrc->{msg}->{$error_id}->{original};
- $original = &de_template_explanation($original);
- $single_error{original} = $original;
- $single_error{id} = $error_id;
- $single_error{verbose} = $rsrc->{msg}->{$error_id}->{verbose};
- push @{$errors}, \%single_error;
- }
-
- }
+ my $max_error_id = 500; # where to stop
+ for ($error_id = 0; $error_id < $max_error_id; $error_id++) {
+ my %single_error;
+ if ($rsrc->{msg}->{$error_id}) {
+ my $verbose = $rsrc->{msg}->{$error_id}->{verbose};
+ if (!$verbose) {
+ my $original = $rsrc->{msg}->{$error_id}->{original};
+ $original = &de_template_explanation($original);
+ $single_error{original} = $original;
+ $single_error{id} = $error_id;
+ $single_error{verbose} = $rsrc->{msg}->{$error_id}->{verbose};
+ push @{$errors}, \%single_error;
+ }
+
+ }
}
- return $errors;
+ return $errors;
}
-sub de_template_explanation {
-# takes the error message template, and replace "template keywords" with real life keywords
+sub de_template_explanation
+{
+
+ # takes the error message template, and replace "template keywords" with real life keywords
my $explanation = shift;
if ($explanation) {
$explanation =~ s/\%1/X/;
diff --git a/misc/spmpp.pl b/misc/spmpp.pl
index c6a153c..ff364fc 100755
--- a/misc/spmpp.pl
+++ b/misc/spmpp.pl
@@ -4,7 +4,7 @@
# for use in the Validator, from an OpenSP ParserMessages.rc.
# (spmpp = "SP Message Pre-Processor")
#
-# $Id: spmpp.pl,v 1.2 2004-05-09 15:56:55 link Exp $
+# $Id: spmpp.pl,v 1.3 2009-11-23 22:15:18 ville Exp $
#
#
@@ -23,14 +23,14 @@ my @msg;
#
# Snarf OpenSP's ParserMessages.rc and populate @msg.
my $msgfile = $ARGV[0] || "/usr/local/validator/htdocs/config/verbosemsg.rc";
-open FH, $msgfile
- or die "Can't open OpenSP ParserMessages file '$msgfile': $!";
+open FH, $msgfile or
+ die "Can't open OpenSP ParserMessages file '$msgfile': $!";
while (<FH>) {
- next if /^\s*$/;
- my($id, $s) = split /, /, $_, 2;
- $id += 0; # Force numerical (kill leading space)...
- chomp $s; # Strip newline from end of message...
- push @msg, [$id, $s];
+ next if /^\s*$/;
+ my ($id, $s) = split /, /, $_, 2;
+ $id += 0; # Force numerical (kill leading space)...
+ chomp $s; # Strip newline from end of message...
+ push @msg, [$id, $s];
}
close FH;
@@ -53,7 +53,7 @@ print <<".EOF.";
# the last digit of the "muid" is replaced at runtime).
#
for (@msg) {
- print <<"_.EOF._";
+ print <<"_.EOF._";
<msg $_->[0]>
original = $_->[1]
verbose <<.EOF.