summaryrefslogtreecommitdiffstats
path: root/httpd/cgi-bin/check
diff options
context:
space:
mode:
authorlink <link@localhost>2004-05-09 15:56:55 +0000
committerlink <link@localhost>2004-05-09 15:56:55 +0000
commit017b559b9d1db8a18f9ff4493bb5eae06639433e (patch)
tree8168bdfc9f1063af585cb03cc104ac31791b3ed9 /httpd/cgi-bin/check
parent8121159457b0bce7d55a77faf6e84619cf678c37 (diff)
downloadmarkup-validator-017b559b9d1db8a18f9ff4493bb5eae06639433e.zip
markup-validator-017b559b9d1db8a18f9ff4493bb5eae06639433e.tar.gz
markup-validator-017b559b9d1db8a18f9ff4493bb5eae06639433e.tar.bz2
Merging from branch validator-0_6_0-branch, at tag validator-0_6_5-release.
Diffstat (limited to 'httpd/cgi-bin/check')
-rwxr-xr-xhttpd/cgi-bin/check867
1 files changed, 582 insertions, 285 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 8c87943..db7cdfc 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -9,7 +9,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.325 2003-11-12 21:20:38 ville Exp $
+# $Id: check,v 1.326 2004-05-09 15:56:53 link Exp $
#
# Disable buffering on STDOUT!
@@ -29,7 +29,7 @@ use strict;
use warnings;
#
-# Modules.
+# Modules. See also the BEGIN block further down below.
#
# Version numbers given where we absolutely need a minimum version of a given
# module (gives nicer error messages). By default, add an empty import list
@@ -37,10 +37,10 @@ use warnings;
# polluting our namespace.
#
use CGI 2.81 qw(
- -newstyle_urls
- -private_tempfiles
- redirect
- ); # 2.81 for XHTML, and import redirect() function.
+ -newstyle_urls
+ -private_tempfiles
+ redirect
+ ); # 2.81 for XHTML, and import redirect() function.
use CGI::Carp qw(carp croak fatalsToBrowser);
use Config::General 2.06 qw(); # Need 2.06 for -SplitPolicy
@@ -51,7 +51,10 @@ use HTTP::Request qw();
use IO::File qw();
use IPC::Open3 qw(open3);
use LWP::UserAgent 1.90 qw(); # Need 1.90 for protocols_(allowed|forbidden)
+use Net::hostent qw(gethostbyname);
+use Net::IP qw();
use Set::IntSpan qw();
+use Socket qw(inet_ntoa);
use Text::Iconv qw();
use Text::Wrap qw(wrap);
use URI qw();
@@ -85,7 +88,8 @@ use constant O_NONE => 8; # 0000 1000
#
# Define global variables.
-use vars qw($DEBUG $CFG $VERSION);
+use vars qw($DEBUG $CFG $RSRC $VERSION $HAVE_IPC_RUN);
+our $HAVE_SOAP_LITE;
#
@@ -142,13 +146,89 @@ The error reported was: '$@'
}
#
+ # Use IPC::Run on mod_perl if it's available, IPC::Open3 otherwise.
+ $HAVE_IPC_RUN = 0;
+ if ($ENV{MOD_PERL}) {
+ eval {
+ local $SIG{__DIE__};
+ require IPC::Run;
+ IPC::Run->import('run', 'timeout');
+ };
+ $HAVE_IPC_RUN = !$@;
+ }
+ unless ($HAVE_IPC_RUN) {
+ require IPC::Open3;
+ IPC::Open3->import('open3');
+ }
+
+ #FIXME: This is just a framework and highly experimental!
+ #
+ # Load SOAP::Lite if available and allowed by config.
+ $HAVE_SOAP_LITE = FALSE;
+ if (exists $ENV{'HTTP_SOAPACTION'} and $CFG->{'Enable SOAP'} == TRUE) {
+ eval {
+ local $SIG{__DIE__};
+ require SOAP::Transport::HTTP;
+ };
+ $HAVE_SOAP_LITE = !$@;
+ }
+ #FIXME;
+
+ #
+ # Read Resource files... (friendly error messages)
+ my %config_opts = (-ConfigFile => $CFG->{'Verbose Msg'});
+ my %rsrc = Config::General->new(%config_opts)->getall();
+ $RSRC = \%rsrc;
+
+ #
# Set debug flag.
$DEBUG = TRUE if $ENV{W3C_VALIDATOR_DEBUG} || $CFG->{DEBUG};
#
# Strings
- $VERSION = q$Revision: 1.325 $;
+ $VERSION = q$Revision: 1.326 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
+
+ #
+ # Use passive FTP by default.
+ $ENV{FTP_PASSIVE} = 1 unless exists($ENV{FTP_PASSIVE});
+
+
+ #
+ # Read TAB-delimited configuration files. Returns a hash reference.
+ sub read_cfg {
+ my $file = shift;
+ my %cfg;
+
+ my $fh = new IO::File $file;
+ unless (defined $fh) {
+ die <<".EOF.";
+open($file) returned: $!
+(Did you forget to set \$ENV{W3C_VALIDATOR_CFG}
+ or to copy validator.conf to /etc/w3c/validator.conf?)
+.EOF.
+ }
+
+ while (<$fh>) {
+ next if /^\s*$/;
+ next if /^\s*\#/;
+ chomp;
+ my($k, $v) = split /\t+/, $_, 2;
+ $v = '' unless defined $v;
+
+ if ($v =~ s(^file://){}) {
+ $cfg{$k} = &read_cfg($v);
+ } elsif ($v =~ /,/) {
+ $cfg{$k} = [split /,/, $v];
+ } else {
+ # Launder data for Perl 5.8+ taint mode, trusting the config...
+ $v =~ /^(.*)$/;
+ $cfg{$k} = $1;
+ }
+ }
+ undef $fh;
+ return \%cfg;
+ }
} # end of BEGIN block.
#
@@ -165,7 +245,10 @@ delete $ENV{PATH};
#
# Create a new CGI object.
-my $q = new CGI;
+my $q;
+unless ($HAVE_SOAP_LITE) {
+ $q = new CGI;
+}
#
# The data structure that will hold all session data.
@@ -178,7 +261,9 @@ my $File;
#
# The URL to this CGI Script.
-$File->{Env}->{'Self URI'} = $q->url(-query => 0);
+unless ($HAVE_SOAP_LITE) {
+ $File->{Env}->{'Self URI'} = $q->url(-query => 0);
+}
#################################
@@ -233,47 +318,54 @@ $T->param(cfg_home_page => $CFG->{Home_Page});
#
# Preprocess the CGI parameters.
-$q = &prepCGI($File, $q);
-
-#
-# Set session switches.
-$File->{Opt}->{'Outline'} = $q->param('outline') ? TRUE : FALSE;
-$File->{Opt}->{'Show Source'} = $q->param('ss') ? TRUE : FALSE;
-$File->{Opt}->{'Show Parsetree'} = $q->param('sp') ? TRUE : FALSE;
-$File->{Opt}->{'No Attributes'} = $q->param('noatt') ? TRUE : FALSE;
-$File->{Opt}->{'Show ESIS'} = $q->param('esis') ? TRUE : FALSE;
-$File->{Opt}->{'Show Errors'} = $q->param('errors') ? TRUE : FALSE;
-$File->{Opt}->{'Verbose'} = $q->param('verbose') ? TRUE : FALSE;
-$File->{Opt}->{'Debug'} = $q->param('debug') ? TRUE : FALSE;
-$File->{Opt}->{'No200'} = $q->param('No200') ? TRUE : FALSE;
-$File->{Opt}->{'Charset'} = $q->param('charset') ? lc $q->param('charset'): '';
-$File->{Opt}->{'DOCTYPE'} = $q->param('doctype') ? $q->param('doctype') : '';
-$File->{Opt}->{'URI'} = $q->param('uri') ? $q->param('uri') : '';
-$File->{Opt}->{'Output'} = $q->param('output') ? $q->param('output') : 'html';
-
-#
-# "Fallback" info for Character Encoding (fbc), Content-Type (fbt),
-# and DOCTYPE (fbd). If TRUE, the Override values are treated as
-# Fallbacks instead of Overrides.
-$File->{Opt}->{FB}->{Charset} = $q->param('fbc') ? TRUE : FALSE;
-$File->{Opt}->{FB}->{Type} = $q->param('fbt') ? TRUE : FALSE;
-$File->{Opt}->{FB}->{DOCTYPE} = $q->param('fbd') ? TRUE : FALSE;
-
-#
-# If ";debug" was given, let it overrule the value from the config file,
-# regardless of whether it's "0" or "1" (on or off).
-$DEBUG = $q->param('debug') if defined $q->param('debug');
-
-&abort_if_error_flagged($File, O_NONE); # Too early to &print_table.
-
-#
-# Get the file and metadata.
-if ($q->param('uploaded_file')) {
- $File = &handle_file($q, $File);
-} elsif ($q->param('fragment')) {
- $File = &handle_frag($q, $File);
-} elsif ($q->param('uri')) {
- $File = &handle_uri($q, $File);
+if ($HAVE_SOAP_LITE) {
+ SOAP::Transport::HTTP::CGI->dispatch_to('MySOAP')->handle;
+ exit; # SOAP calls do all the processing in the sub...
+} else {
+ $q = &prepCGI($File, $q);
+
+ #
+ # Set session switches.
+ $File->{Opt}->{'Outline'} = $q->param('outline') ? TRUE : FALSE;
+ $File->{Opt}->{'Show Source'} = $q->param('ss') ? TRUE : FALSE;
+ $File->{Opt}->{'Show Parsetree'} = $q->param('sp') ? TRUE : FALSE;
+ $File->{Opt}->{'No Attributes'} = $q->param('noatt') ? TRUE : FALSE;
+ $File->{Opt}->{'Show ESIS'} = $q->param('esis') ? TRUE : FALSE;
+ $File->{Opt}->{'Show Errors'} = $q->param('errors') ? TRUE : FALSE;
+ $File->{Opt}->{'Verbose'} = $q->param('verbose') ? TRUE : FALSE;
+ $File->{Opt}->{'Debug'} = $q->param('debug') ? TRUE : FALSE;
+ $File->{Opt}->{'No200'} = $q->param('No200') ? TRUE : FALSE;
+ # $File->{Opt}->{'Fussy'} = $q->param('fussy') ? TRUE : FALSE;
+ $File->{Opt}->{'Charset'} = $q->param('charset') ? lc $q->param('charset'): '';
+ $File->{Opt}->{'DOCTYPE'} = $q->param('doctype') ? $q->param('doctype') : '';
+ $File->{Opt}->{'URI'} = $q->param('uri') ? $q->param('uri') : '';
+ $File->{Opt}->{'Output'} = $q->param('output') ? $q->param('output') : 'html';
+ $File->{Opt}->{'Max Errors'} = $q->param('me') ? $q->param('me') : '';
+
+ #
+ # "Fallback" info for Character Encoding (fbc), Content-Type (fbt),
+ # and DOCTYPE (fbd). If TRUE, the Override values are treated as
+ # Fallbacks instead of Overrides.
+ $File->{Opt}->{FB}->{Charset} = $q->param('fbc') ? TRUE : FALSE;
+ $File->{Opt}->{FB}->{Type} = $q->param('fbt') ? TRUE : FALSE;
+ $File->{Opt}->{FB}->{DOCTYPE} = $q->param('fbd') ? TRUE : FALSE;
+
+ #
+ # If ";debug" was given, let it overrule the value from the config file,
+ # regardless of whether it's "0" or "1" (on or off).
+ $DEBUG = $q->param('debug') if defined $q->param('debug');
+
+ &abort_if_error_flagged($File, O_NONE); # Too early to &print_table.
+
+ #
+ # Get the file and metadata.
+ if ($q->param('uploaded_file')) {
+ $File = &handle_file($q, $File);
+ } elsif ($q->param('fragment')) {
+ $File = &handle_frag($q, $File);
+ } elsif ($q->param('uri')) {
+ $File = &handle_uri($q, $File);
+ }
}
#
@@ -295,19 +387,6 @@ untie *STDIN;
###############################################################################
#
-# Add warning if we were redirected.
-unless (URI::eq("$File->{Opt}->{URI}", $File->{URI}) and not $File->{'Is Upload'}) {
- &add_warning(
- $File, 'Note:',
- sprintf(
- 'The URI you gave me, &lt;%s&gt;, returned a redirect to &lt;%s&gt;.',
- &ent($File->{Opt}->{URI}),
- &ent($File->{URI}),
- )
- );
-}
-
-#
# Find the XML Encoding.
$File = &find_xml_encoding($File);
@@ -367,19 +446,19 @@ if (&conflict($File->{Opt}->{Charset}, '(detect automatically)')) {
if ($File->{Opt}->{FB}->{Charset} and not $File->{Charset}->{Use}) {
&add_warning($File, 'fallback', 'No Character Encoding Found!', <<".EOF."); # Warn about fallback...
Falling back to "$File->{Charset}->{Override}"
- (<a href="docs/errors.html#fbc">explain...</a>).
+ (<a href="docs/users.html#fbc">explain...</a>).
.EOF.
$File->{Tentative} |= T_ERROR; # Tag it as Invalid.
} else {
# Warn about Override...
unless ($File->{Charset}->{Override} eq $File->{Charset}->{Use}) {
- my $cs_use = &ent($File->{Charset}->{Use});
- my $cs_opt = &ent($File->{Charset}->{Override});
- &add_warning($File, 'override', 'Character Encoding Override in effect!', <<".EOF.");
+ my $cs_use = &ent($File->{Charset}->{Use});
+ my $cs_opt = &ent($File->{Charset}->{Override});
+ &add_warning($File, 'override', 'Character Encoding Override in effect!', <<".EOF.");
The detected character encoding "<code>$cs_use</code>"
has been suppressed and "<code>$cs_opt</code>" used instead.
.EOF.
- $File->{Tentative} |= T_ERROR;
+ $File->{Tentative} |= T_ERROR;
}
}
$File->{Charset}->{Use} = $File->{Charset}->{Override};
@@ -395,6 +474,9 @@ unless ($File->{Charset}->{Use}) { # No charset given...
to the "UTF-8" encoding and will attempt to perform the validation,
but this is likely to fail for all non-trivial documents.
</p>
+.EOF.
+ if ($File->{Opt}->{Verbose}) {
+ $message .= <<".EOF.";
<p>The sources I tried to find encoding information include:</p>
<ul>
<li>The HTTP Content-Type field.</li>
@@ -423,8 +505,8 @@ unless ($File->{Charset}->{Use}) { # No charset given...
tips on how to do this</a> in popular web server implementations.
</p>
.EOF.
- $message .= &iana_charset_blurb();
- $message .= <<".EOF.";
+ $message .= &iana_charset_blurb();
+ $message .= <<".EOF.";
<p>
To quickly check whether the document would validate after addressing
the missing character encoding information, you can use the "Encoding"
@@ -434,6 +516,12 @@ unless ($File->{Charset}->{Use}) { # No charset given...
common encodings if you are not sure what encoding to choose.
</p>
.EOF.
+ }
+ else {
+ $message .= <<".EOF.";
+ <p>So what should I do? <a href="docs/help.html#faq-charset">Tell me more...</a></p>
+.EOF.
+ }
my $title = 'No Character Encoding Found! Falling back to UTF-8.';
&add_warning($File, 'fatal', $title, $message);
$File->{Tentative} |= T_ERROR; # Can never be valid.
@@ -521,134 +609,175 @@ $File->{Type} = 'mathml+xml' if $File->{DOCTYPE} =~ /mathml/i;
$File = &charset_conflicts($File);
#
-# By default, use SGML catalog file and SGML Declaration.
-my $catalog = File::Spec->catfile($CFG->{SGML_Library}, 'sgml.soc');
-my @xmlflags = qw(
- -R
- -wvalid
- -wnon-sgml-char-ref
- -wno-duplicate
- );
+# Abandon all hope ye who enter here...
+$File = &parse($File);
+sub parse (\$) {
+ my $File = shift;
-#
-# Switch to XML semantics if file is XML.
-if (&is_xml($File)) {
- $catalog = File::Spec->catfile($CFG->{SGML_Library}, 'xml.soc');
- push(@xmlflags, '-wxml');
- &add_warning($File, 'note', 'Note:', <<".EOF.");
+ #
+ # By default, use SGML catalog file and SGML Declaration.
+ my $catalog = File::Spec->catfile($CFG->{'SGML Library'}, 'sgml.soc');
+ my @spopt = qw(
+ -R
+ -wvalid
+ -wnon-sgml-char-ref
+ -wno-duplicate
+ );
+
+ #
+ # Switch to XML semantics if file is XML.
+ if (&is_xml($File)) {
+ $catalog = File::Spec->catfile($CFG->{'SGML Library'}, 'xml.soc');
+ push(@spopt, '-wxml');
+ &add_warning($File, 'note', 'Note:', <<".EOF.");
The Validator XML support has
<a href="http://openjade.sourceforge.net/doc/xml.htm"
title="Limitations in Validator XML support">some limitations</a>.
.EOF.
-}
-
+ } else { # Only add these in SGML mode.
+# if ($File->{Opt}->{'Fussy'}) {
+# push @spopt, '-wmin-tag';
+# push @spopt, '-wfully-tagged';
+# push @spopt, '-wrefc';
+# push @spopt, '-wmissing-att-name';
+# push @spopt, '-wdata-delim';
+# &add_warning($File, 'note', 'Note:', <<".EOF.");
+# The Validator is running in "Fussy" mode. In this mode it will generate
+# warnings about some things that are not strictly forbidden in the HTML
+# Recommendation, but that are known to be problematic in popular browsers.
+# In general it is recommended that you fix any such errors regardless, but
+# if in doubt you can rerun the Validator in its lax mode to find out if it
+# will pass your document then.
+#.EOF.
+# }
+ }
-#
-# Defaults for SP; turn off fixed charset mode and set encoding to UTF-8.
-$ENV{SP_CHARSET_FIXED} = 'NO';
-$ENV{SP_ENCODING} = 'UTF-8';
-$ENV{SP_BCTF} = 'UTF-8';
-#
-# Tell onsgmls about the SGML Library.
-$ENV{SGML_SEARCH_PATH} = $CFG->{SGML_Library};
+ #
+ # Defaults for SP; turn off fixed charset mode and set encoding to UTF-8.
+ $ENV{SP_CHARSET_FIXED} = 'NO';
+ $ENV{SP_ENCODING} = 'UTF-8';
+ $ENV{SP_BCTF} = 'UTF-8';
-#
-# Set final command to use.
-#my @cmd = ($CFG->{SGML_Parser}, '-c', $catalog, '-E0', @xmlflags);
-my @cmd = ('/usr/bin/onsgmls', '-c', '/usr/local/validator/htdocs/sgml-lib/xml.soc', '-E0', @xmlflags);
+ #
+ # Tell onsgmls about the SGML Library.
+ $ENV{SGML_SEARCH_PATH} = $CFG->{'SGML Library'};
-#
-# Set debug info for HTML report.
-$T->param(is_debug => $DEBUG);
-$T->param(
- debug => [
- {name => 'Command', value => &ent("@cmd")},
- {name => 'SP_CHARSET_FIXED', value => &ent($ENV{SP_CHARSET_FIXED})},
- {name => 'SP_ENCODING', value => &ent($ENV{SP_ENCODING})},
- {name => 'SP_BCTF', value => &ent($ENV{SP_BCTF})},
- ],
- );
+ #
+ # Set the command to execute.
+ my @cmd = ($CFG->{'SGML Parser'}, '-n', '-c', $catalog, @spopt);
+ #
+ # Set debug info for HTML report.
+ $T->param(is_debug => $DEBUG);
+ $T->param(
+ debug => [
+ {name => 'Command', value => &ent("@cmd")},
+ {name => 'SP_CHARSET_FIXED', value => &ent($ENV{SP_CHARSET_FIXED})},
+ {name => 'SP_ENCODING', value => &ent($ENV{SP_ENCODING})},
+ {name => 'SP_BCTF', value => &ent($ENV{SP_BCTF})},
+ ],
+ );
+
+ #FIXME: This needs a UI and testing!
+ #
+ # Set onsgmls' -E switch to the number of errors requested.
+ if ($File->{Opt}->{'Max Errors'} =~ m(^all$)i) {
+ push @cmd, '-E0';
+ } elsif ($File->{Opt}->{'Max Errors'} =~ m(^(\d+)$)) {
+ my $numErr = $1;
+ if ($numErr >= 200) {
+ $numErr = 200;
+ } elsif ($numErr <= 0) {
+ $numErr = 0; #FIXME: Should add feature to supress error output in this case.;
+ }
+ push @cmd, '-E' . $numErr;
+ } else {
+ push @cmd, '-E' . ($CFG->{'Max Errors'} || 0); # "-E0" means "all".
+ }
+ #FIXME;
-#
-# Temporary filehandles.
-my $spin = IO::File->new_tmpfile;
-my $spout = IO::File->new_tmpfile;
-my $sperr = IO::File->new_tmpfile;
+ #
+ # Temporary filehandles.
+ my $spin = IO::File->new_tmpfile;
+ my $spout = IO::File->new_tmpfile;
+ my $sperr = IO::File->new_tmpfile;
-#
-# Dump file to a temp file for parsing.
-for (@{$File->{Content}}) {
- print $spin $_, "\n";
-}
+ #
+ # Dump file to a temp file for parsing.
+ for (@{$File->{Content}}) {
+ print $spin $_, "\n";
+ }
-#
-# seek() to beginning of the file.
-seek $spin, 0, 0;
+ #
+ # seek() to beginning of the file.
+ seek $spin, 0, 0;
-#
-# Run it through SP, redirecting output to temporary files.
-my $pid = do {
- no warnings 'once';
- local(*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr);
- open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd);
-};
+ #
+ # Run it through SP, redirecting output to temporary files.
+ if ($HAVE_IPC_RUN) {
+ local $^W = 0;
+ run(\@cmd, $spin, $spout, $sperr, timeout(60));
+ undef $spin;
+ } else {
+ my $pid = do {
+ no warnings 'once';
+ local(*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr);
+ open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd);
+ };
+ undef $spin;
+ waitpid $pid, 0;
+ }
-#
-# Close input file, reap the kid, and rewind temporary filehandles.
-undef $spin;
-waitpid $pid, 0;
-seek $_, 0, 0 for $spout, $sperr;
+ #
+ # Rewind temporary filehandles.
+ seek $_, 0, 0 for $spout, $sperr;
-$File = &parse_errors($File, $sperr); # Parse error output.
-undef $sperr; # Get rid of no longer needed filehandle.
+ $File = &parse_errors($File, $sperr); # Parse error output.
+ undef $sperr; # Get rid of no longer needed filehandle.
-$File->{ESIS} = [];
-my $elements_found = 0;
-while (<$spout>) {
- push @{$File->{'DEBUG'}->{ESIS}}, $_;
- $elements_found++ if /^\(/;
+ $File->{ESIS} = [];
+ my $elements_found = 0;
+ while (<$spout>) {
+ push @{$File->{'DEBUG'}->{ESIS}}, $_;
+ $elements_found++ if /^\(/;
- if (/^Axmlns() \w+ (.*)/ or /^Axmlns:([^ ]+) \w+ (.*)/) {
- if (not $File->{Namespace} and $elements_found == 0 and $1 eq "") {
- $File->{Namespace} = $2;
+ if (/^Axmlns() \w+ (.*)/ or /^Axmlns:([^ ]+) \w+ (.*)/) {
+ if (not $File->{Namespace} and $elements_found == 0 and $1 eq "") {
+ $File->{Namespace} = $2;
+ }
+ $File->{Namespaces}->{$2}++;
}
- $File->{Namespaces}->{$2}++ unless $2 eq $File->{Namespace};
- }
- next if / IMPLIED$/;
- next if /^ASDAFORM CDATA /;
- next if /^ASDAPREF CDATA /;
- chomp; # Removes trailing newlines
- push @{$File->{ESIS}}, $_;
-}
-undef $spout;
+ next if / IMPLIED$/;
+ next if /^ASDAFORM CDATA /;
+ next if /^ASDAPREF CDATA /;
+ chomp; # Removes trailing newlines
+ push @{$File->{ESIS}}, $_;
+ }
+ undef $spout;
-#
-# Check whether the parser thought it was Valid.
-if ($File->{ESIS}->[-1] =~ /^C$/) {
- delete $File->{ESIS}->[-1];
- $File->{'Is Valid'} = TRUE;
-} else {
- $File->{'Is Valid'} = FALSE;
-}
+ if ($File->{ESIS}->[-1] =~ /^C$/) {
+ undef $File->{ESIS}->[-1];
+ $File->{'Is Valid'} = TRUE;
+ } else {
+ $File->{'Is Valid'} = FALSE;
+ }
-#
-# Extract the Namespaces.
-$File->{Namespaces} = [map {name => '', uri => $_}, keys %{$File->{Namespaces}}];
+ #
+ # Set Version to be the FPI initially.
+ $File->{Version} = $File->{DOCTYPE};
-#
-# Set Version to be the FPI initially.
-$File->{Version} = $File->{DOCTYPE};
+ #
+ # Extract any version attribute from the ESIS.
+ for (@{$File->{ESIS}}) {
+ no warnings 'uninitialized';
+ next unless /^AVERSION CDATA (.*)/;
+ $File->{Version} = $1;
+ last;
+ }
-#
-# Extract any version attribute from the ESIS.
-for (@{$File->{ESIS}}) {
- no warnings 'uninitialized';
- next unless /^AVERSION CDATA (.*)/;
- $File->{Version} = $1;
- last;
+ return $File;
}
#
@@ -673,15 +802,13 @@ if (my $prettyver = $CFG->{Types}->{$File->{Version}}->{Display}) {
if (&is_xml($File) and $File->{Namespace}) {
my $rns = &ent($File->{Namespace});
if (&is_xhtml($File) and $File->{Namespace} ne 'http://www.w3.org/1999/xhtml') {
- &add_warning(
- $File, 'warning', 'Warning:',
- "Unknown namespace (&#171;<code>$rns</code>&#187;) for text/html document!",
- );
+ &add_warning($File, 'warning', 'Warning:',
+ "Unknown namespace (&#171;<code>$rns</code>&#187;) for text/html document!",
+ );
} elsif (&is_svg($File) and $File->{Namespace} ne 'http://www.w3.org/2000/svg') {
- &add_warning(
- $File, 'warning', 'Warning:',
- "Unknown namespace (&#171;<code>$rns</code>&#187;) for SVG document!",
- );
+ &add_warning($File, 'warning', 'Warning:',
+ "Unknown namespace (&#171;<code>$rns</code>&#187;) for SVG document!",
+ );
}
}
@@ -711,24 +838,6 @@ if (defined $File->{Tentative}) {
}
}
-unless ($File->{Opt}->{Verbose}) {
- unless ($File->{'Is Upload'}) {
- my $thispage = $File->{Env}->{'Self URI'};
- my $escaped_uri = uri_escape($File->{URI});
- $thispage .= qq(?uri=$escaped_uri);
- $thispage .= ';ss=1' if $File->{Opt}->{'Show Source'};
- $thispage .= ';sp=1' if $File->{Opt}->{'Show Parsetree'};
- $thispage .= ';noatt=1' if $File->{Opt}->{'No Attributes'};
- $thispage .= ';outline=1' if $File->{Opt}->{'Outline'};
-
- &add_warning($File, 'Note:', <<".EOF.");
- You can enable <a href="$thispage;verbose=1">verbose results</a> from
- the <a href="detailed.html">Extended Interface</a>.
-.EOF.
- }
-}
-
-
if ($File->{Opt}->{Output} eq 'xml') {
&report_xml($File);
} elsif ($File->{Opt}->{Output} eq 'earl') {
@@ -743,7 +852,6 @@ if ($File->{Opt}->{Output} eq 'xml') {
&report_valid($File, $T);
} else {
$T->param(VALID => FALSE);
- $File->{Opt}->{'Show Source'} = TRUE;
$T->param(file_errors => &report_errors($File));
}
@@ -811,8 +919,8 @@ sub prep_template ($$) {
#
# Namespaces...
$T->param(file_namespace => &ent($File->{Namespace}));
- $T->param(file_namespaces => $File->{Namespaces})
- if $File->{Namespaces};
+# $T->param(file_namespaces => $File->{Namespaces})
+# if $File->{Namespaces};
}
#
@@ -865,10 +973,10 @@ sub add_warning ($$$$) {
my $Message = shift;
push @{$File->{Warnings}}, {
- Class => $Class,
- Title => $Title,
- Message => $Message,
- };
+ Class => $Class,
+ Title => $Title,
+ Message => $Message,
+ };
}
@@ -942,11 +1050,10 @@ Content-Type: text/html; charset=utf-8
<p>
Of course, you may not want to trust me with this information,
which is fine. I can tell you that I don't log it or do
- anything else nasty with it, and you can <a
- href="http://validator.w3.org/source/">download the source for
- this service</a> to see what it does, but you have no guarantee
- that this is actually the code I'm using; you basically have to
- decide whether to trust me or not. :-)
+ anything else nasty with it, and you can <a href="source/">download the
+ source code for this service</a> to see what it does, but you have no
+ guarantee that this is actually the code I'm using; you basically have to
+ decide whether to trust me or not :-)
</p>
<p>
You should also be aware that the way we proxy this authentication
@@ -1005,19 +1112,13 @@ sub handle_uri {
my $q = shift; # The CGI object.
my $File = shift; # The master datastructure.
- my $uri = new URI $q->param('uri'); # The URI to fetch.
+ my $uri = new URI (ref $q ? $q->param('uri') : $q);
my $ua = new LWP::UserAgent;
$ua->agent("W3C_Validator/$VERSION " . $ua->agent);
$ua->parse_head(0); # Parse the http-equiv stuff ourselves. @@ Why?
- # @@@FIXME@@@:
- # Disable checking if the URI is local (or private) for security reasons,
- # or at least make it configurable to do so.
- # eg. /^(localhost(\.localdomain)?|127\..+)$/ (+ private networks)
- # Net::IP from CPAN could be useful here.
- #
- $ua->protocols_allowed($CFG->{Allowed_Protocols});
+ $ua->protocols_allowed($CFG->{'Allowed Protocols'} || ['http', 'https']);
unless ($ua->is_protocol_supported($uri)) {
$File->{'Error Flagged'} = TRUE;
@@ -1025,6 +1126,23 @@ sub handle_uri {
return $File;
}
+ my $addr = my $iptype = undef;
+ if (my $host = gethostbyname($uri->host())) {
+ $addr = inet_ntoa($host->addr()) if $host->addr();
+ if ($addr && (my $ip = Net::IP->new($addr))) {
+ $iptype = $ip->iptype();
+ }
+ }
+ $iptype = 'PUBLIC'
+ if ($iptype && $iptype eq 'PRIVATE' && $CFG->{'Allow Private IPs'});
+ if ($iptype && $iptype ne 'PUBLIC') {
+ $File->{'Error Flagged'} = TRUE;
+ $File->{'Error Message'} = &ip_rejected($uri->host(), $addr);
+ return $File;
+ }
+ undef $iptype;
+ undef $addr;
+
my $req = new HTTP::Request(GET => $uri);
# If we got a Authorization header, the client is back at it after being
@@ -1045,11 +1163,12 @@ sub handle_uri {
return $File;
}
- my($type, $ct, $charset) = &parse_content_type(
- $File,
- $res->header('Content-Type'),
- scalar($res->request->url),
- );
+ my($type, $ct, $charset)
+ = &parse_content_type(
+ $File,
+ $res->header('Content-Type'),
+ scalar($res->request->url),
+ );
my $lastmod = undef;
if ( $res->last_modified ) {
@@ -1123,7 +1242,7 @@ sub handle_frag {
sub parse_content_type {
my $File = shift;
my $Content_Type = shift;
- my $url = shift || '';
+ my $url = shift;
my $charset = '';
my $type = '';
@@ -1143,8 +1262,8 @@ sub parse_content_type {
if ($type =~ m(/)) {
if ($type =~ m(text/css) and defined $url) {
print redirect
- 'http://jigsaw.w3.org/css-validator/validator?uri='
- . uri_escape $url;
+ 'http://jigsaw.w3.org/css-validator/validator?uri='
+ . uri_escape $url;
exit;
} else {
$File->{'Error Flagged'} = TRUE;
@@ -1326,7 +1445,7 @@ it to reflect this new DOCTYPE.
if ($File->{Opt}->{FB}->{DOCTYPE}) {
&add_warning($File, 'fallback', 'No DOCTYPE Found!', <<".EOF.");
-Falling back to HTML 4.01 Transitional. (<a href="docs/errors.html#fbd">explain...</a>)
+Falling back to HTML 4.01 Transitional. (<a href="docs/users.html#fbd">explain...</a>)
.EOF.
$File->{Tentative} |= T_ERROR; # Tag it as Invalid.
} else {
@@ -1353,6 +1472,7 @@ sub parse_errors ($$) {
$File->{Errors} = []; # Initialize to an (empty) anonymous array ref.
for (<$fh>) {
push @{$File->{'DEBUG'}->{Errors}}, $_;
+ chomp;
my($err, @errors);
next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
next if /numbers exceeding 65535 not supported/;
@@ -1368,18 +1488,19 @@ sub parse_errors ($$) {
$err->{src} = $errors[1];
$err->{line} = $errors[2];
$err->{char} = $errors[3];
- $err->{type} = $errors[4];
+ $err->{num} = $errors[4] || '';
+ $err->{type} = $errors[5] || '';
if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') {
- $err->{msg} = join ':', @errors[5 .. $#errors];
+ $err->{msg} = join ':', @errors[6 .. $#errors];
} elsif ($err->{type} eq 'W') {
-# &add_warning(
-# $File, 'fake', 'Warning:',
-# "Line $err->{line}, column $err->{char}: $errors[5]",
-# );
- $err->{msg} = join ':', @errors[5 .. $#errors];
+ &add_warning($File, 'fake', 'Warning:',
+ "Line $err->{line}, column $err->{char}: $errors[6]",
+ );
+ $err->{msg} = join ':', @errors[6 .. $#errors];
} else {
$err->{type} = 'I';
- $err->{msg} = $errors[4];
+ $err->{num} = '';
+ $err->{msg} = join ':', @errors[4 .. $#errors];
}
# No or unknown FPI and a relative SI.
@@ -1401,19 +1522,19 @@ sub parse_errors ($$) {
# No DOCTYPE.
if ($err->{msg} =~ m(prolog can\'t be omitted)) {
my $class = 'fatal';
- my $title = 'No DOCTYPE Declaration Found! Falling Back to HTML 4.01 Transitional';
+ my $title = 'No DOCTYPE Found! Falling Back to HTML 4.01 Transitional';
my $message = <<".EOF.";
- <div>
<p>
A DOCTYPE Declaration is mandatory for most current markup languages
and without one it is impossible to reliably validate this document.
I am falling back to "HTML 4.01 Transitional" and will attempt to
- validate the document anyway, but this is very likley to produce
+ validate the document anyway, but this is very likely to produce
spurious error messages for most non-trivial documents.
</p>
.EOF.
- $message .= &doctype_spiel();
- $message .= <<".EOF.";
+ if ($File->{Opt}->{Verbose}) {
+ $message .= &doctype_spiel();
+ $message .= <<".EOF.";
<p>
The W3C QA Activity maintains a <a
href="http://www.w3.org/QA/2002/04/valid-dtd-list.html">List of
@@ -1422,14 +1543,19 @@ sub parse_errors ($$) {
"<a href="http://htmlhelp.com/tools/validator/doctype.html">Choosing
a DOCTYPE</a>".
</p>
- </div>
.EOF.
-
+ }
+ else {
+ $message .= <<".EOF.";
+ <p>So what should I do? <a href="docs/help.html#faq-doctype">Tell me more...</a></p>
+.EOF.
+ }
&add_warning($File, $class, $title, $message);
next; # Don't report this as a normal error.
}
&abort_if_error_flagged($File, O_DOCTYPE);
+ $err->{msg} =~ s/^\s*//;
push @{$File->{Errors}}, $err;
}
undef $fh;
@@ -1440,30 +1566,96 @@ sub parse_errors ($$) {
# Generate a HTML report of detected errors.
sub report_errors ($) {
my $File = shift;
-
my $Errors = [];
if (scalar @{$File->{Errors}}) {
foreach my $err (@{$File->{Errors}}) {
my($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
- # Find index into the %frag hash for the "explanation..." links.
- $err->{idx} = $err->{msg};
- $err->{idx} =~ s/"[^\"]*"/FOO/g;
- $err->{idx} =~ s/[^A-Za-z ]//g;
- $err->{idx} =~ s/\s+/ /g; # Collapse spaces
- $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces. )
- $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
- $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+ #DEBUG: Gather vars for print below.
+ my $orglength = length($File->{Content}->[$err->{line}-1]);
+ my $adjlength = length $line;
+ my $orgcol = $err->{char};
+ my $adjcol = $col;
+ #DEBUG;
- $line = &ent($line); # Entity encode.
- $line =~ s/\t/ /g; # Collapse TABs.
+ #
+ # Chop the source line into 3 pieces; the character at which the error
+ # was detected, and everything to the left and right of that position.
+ # That way we can add markup to the relevant char without breaking &ent().
+ #
- if (defined $CFG->{Error_to_URI}->{$err->{idx}}) {
- $err->{uri} = $CFG->{Msg_FAQ_URI} . '#'
- . $CFG->{Error_to_URI}->{$err->{idx}};
+ #
+ # Left side...
+ my $left;
+ {
+ my $offset = 0; # Left side allways starts at 0.
+ my $length;
+
+ if ($col - 1 < 0) { # If error is at start of line...
+ $length = 0; # ...floor to 0 (no negative offset).
+ } elsif ($col == length $line) { # If error is at EOL...
+ $length = $col - 1; # ...leave last char to indicate position.
+ } else { # Otherwise grab everything up to pos of error.
+ $length = $col;
+ }
+ $left = substr $line, $offset, $length;
+ $left = &ent($left);
+ }
+
+ #
+ # The character where the error was detected.
+ my $char;
+ {
+ my $offset;
+ my $length = 1; # Length is always 1; the char where error was found.
+
+ if ($col == length $line) { # If err is at EOL...
+ $offset = $col - 1; # ...then grab last char on line instead.
+ } else {
+ $offset = $col; # Otherwise just grab the char.
+ }
+ $char = substr $line, $offset, $length;
+ $char = &ent($char);
+ }
+
+ #
+ # The right side up to the end of the line...
+ my $right;
+ {
+ my $offset;
+ my $length;
+
+ # Offset...
+ if ($col == length $line) { # If at EOL...
+ $offset = 0; # Don't bother as there is nothing left to grab.
+ } else {
+ $offset = $col + 1; # Otherwise get everything from char-after-error.
+ }
+
+ # Length...
+ if ($col == length $line) { # If at end of line...
+ $length = 0; # ...then don't grab anything.
+ } else {
+ $length = length($line) - ($col - 1); # Otherwise get the rest of the line.
+ }
+ $right = substr $line, $offset, $length;
+ $right = &ent($right);
}
+ $char = qq(<strong title="Position where error was detected.">$char</strong>);
+ $line = $left . $char . $right;
+
+ #DEBUG: Print misc. vars relevant to source display.
+ if ($DEBUG) {
+ $line .= "<br/> <strong>org length: $orglength - adj length: $adjlength - org col: $orgcol - adj col: $adjcol</strong>";
+ }
+ #DEBUG;
+
+ if (defined $CFG->{Error_to_URI}->{$err->{idx}}) {
+ $err->{uri} = $CFG->{Msg_FAQ_URI} . '#'
+ . $CFG->{Error_to_URI}->{$err->{idx}};
+ }
$err->{src} = $line;
$err->{col} = ' ' x $col;
@@ -1482,40 +1674,60 @@ sub outline {
my $outline = '';
my $prevlevel = 0;
- my $indent = 0;
my $level = 0;
for (1 .. $#{$File->{ESIS}}) {
my $line = $File->{ESIS}->[$_];
- next unless $line =~ /^\(H([1-6])$/i;
+ next unless ($line && $line =~ /^\(H([1-6])$/i);
+
$prevlevel = $level;
$level = $1;
- $outline .= " </ul>\n" x ($prevlevel - $level); # perl is so cool.
- if ($level - $prevlevel == 1) {$outline .= " <ul>\n"};
- foreach my $i (($prevlevel + 1) .. ($level - 1)) {
- $outline .= qq( <ul>\n <li class="warning">A level $i heading is missing!</li>\n);
+ my $TAB = $level + 2;
+
+ if ($prevlevel == 0) {
+ print " <ul>\n";
+ } else {
+ if ($level < $prevlevel) {
+ print "</li>\n";
+ for (my $i = $prevlevel; $i > $level; $i--) {
+ print " " x ($i + 2), "</ul>\n";
+ print " " x (($i + 2) - 1), "</li>\n";
+ }
+ } elsif ($level == $prevlevel) {
+ print "</li>\n";
+ } elsif ($level > $prevlevel) {
+ if ($level - $prevlevel > 1) {
+ foreach my $i (($prevlevel + 1) .. ($level - 1)) {
+ print "\n", " " x ($i + 2), "<ul>\n", " " x ($i + 2);
+ print qq(<li class="warning">A level $i heading is missing!);
+ }
+ print "\n", " " x $TAB, "<ul>\n";
+ } else {
+ print "\n", " " x $TAB;
+ print "<ul>\n";
+ }
+ }
}
- if ($level - $prevlevel > 1) {$outline .= " <ul>\n"};
$line = '';
my $heading = '';
until (substr($line, 0, 3) =~ /^\)H$level/i) {
$line = $File->{ESIS}->[$_++];
- $line =~ s/\\011/ /g;
- $line =~ s/\\012/ /g;
if ($line =~ /^-/) {
my $headcont = $line;
substr($headcont, 0, 1) = " ";
- $headcont =~ s/\\n/ /g;
$heading .= $headcont;
} elsif ($line =~ /^AALT CDATA( .+)/i) {
my $headcont = $1;
- $headcont =~ s/\\n/ /g;
$heading .= $headcont;
}
}
+ $heading =~ s/\\011/ /g;
+ $heading =~ s/\\012/ /g;
+ $heading =~ s/\\n/ /g;
+ $heading =~ s/\s+/ /g;
$heading = substr($heading, 1); # chop the leading '-' or ' '.
$heading = &ent($heading);
$outline .= " <li>$heading</li>\n";
@@ -1722,15 +1934,15 @@ sub prepCGI {
if ($q->path_info) {
if ($q->path_info eq '/referer' or $q->path_info eq '/referrer') {
if ($q->referer) {
- print redirect $q->url() . '?uri=' . uri_escape($q->referer);
- exit;
+ $q->param('uri', $q->referer);
+ print redirect &self_url_q($q, $File);
+ exit;
} else {
- print redirect $q->url() . '?uri=' . 'referer';
- exit;
+ print redirect $q->url() . '?uri=' . 'referer';
+ exit;
}
} else {
- my $thispage = &self_url_q($q);
- print redirect $thispage;
+ print redirect &self_url_q($q, $File);
exit;
}
}
@@ -1747,7 +1959,8 @@ sub prepCGI {
# Issue a redirect for uri=referer.
if ($q->param('uri') and $q->param('uri') eq 'referer') {
if ($q->referer) {
- print redirect $q->url() . '?uri=' . uri_escape($q->referer);
+ $q->param('uri', $q->referer);
+ print redirect &self_url_q($q, $File);
exit;
} else {
# Redirected from /check/referer to /check?uri=referer because
@@ -1791,7 +2004,7 @@ sub prepCGI {
# Redirect to a GETable URL if method is POST without a file upload.
if ($q->request_method eq 'POST' and not $File->{'Is Upload'}) {
- my $thispage = &self_url_q($q);
+ my $thispage = &self_url_q($q, $File);
print redirect $thispage;
exit;
}
@@ -1833,6 +2046,24 @@ sub prepSSI {
#
+# Output errors for a rejected IP address.
+sub ip_rejected {
+ my ($host, $ip) = @_;
+ my $msg = $host || 'undefined';
+ $msg = 'of ' . $msg if ($ip && $host ne $ip);
+ return sprintf(<<".EOF.", &ent($msg));
+ <div class="error">
+ <p>
+ Sorry, the IP address %s is not public.
+ For security reasons, validating resources located at non-public IP
+ addresses has been disabled in this service.
+ </p>
+ </div>
+.EOF.
+}
+
+
+#
# Output errors for a rejected URI.
sub uri_rejected {
my $scheme = shift || 'undefined';
@@ -1860,9 +2091,9 @@ sub uri_rejected {
using the File Upload interface.
</p>
<p>
- Support for <abbr title="Secure Sockets Layer">SSL</abbr> and
- <abbr title="Transport Layer Security">TLS</abbr> is a known
- limitation and is beeing tracked as
+ Incomplete support for <abbr title="Secure Sockets Layer">SSL</abbr>
+ and <abbr title="Transport Layer Security">TLS</abbr> is a known
+ limitation and is being tracked as
<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=77">Issue #77</a>.
</p>
</div>
@@ -1951,7 +2182,11 @@ sub charset_conflicts {
sub transcode {
my $File = shift;
- my ($command, $result_charset) = split " ", $CFG->{Charsets}->{$File->{Charset}->{Use}}, 2;
+ my ($command, $result_charset) = ('', '');
+ if ($CFG->{Charsets}->{$File->{Charset}->{Use}}) {
+ ($command, $result_charset) =
+ split(" ", $CFG->{Charsets}->{$File->{Charset}->{Use}}, 2);
+ }
$result_charset = exact_charset($File, $result_charset);
if ($command eq 'I') {
@@ -1993,7 +2228,20 @@ sub transcode {
$_ = $c->convert($_); # $_ is local!!
if ($in ne "" and $_ eq "") {
push @{$File->{Lines}}, $line;
- $_ = "#### encoding problem on this line, not shown ####";
+ # try to decoded as much as possible of the line
+ my $short = 0; # longest okay
+ my $long = (length $in) - 1; # longest unknown
+ while ($long > $short) { # binary search
+ my $try = int (($long+$short+1) / 2);
+ if ($c->convert(substr($in,0,$try)) eq "") {
+ $long = $try-1;
+ } else {
+ $short = $try;
+ }
+ }
+ my $remain = (length $in) - $short;
+ $_ = $c->convert(substr($in,0,$short))
+ . "#### $remain byte(s) unconvertable ####";
}
}
return $File;
@@ -2126,7 +2374,8 @@ X-W3C-Validator-Errors: $errs
if (defined $File->{Warnings} and scalar @{$File->{Warnings}}) {
print qq( <warnings>\n);
- printf qq( <warning>%s</warning>\n), &ent($_) for @{$File->{Warnings}};
+ printf qq( <warning>%s</warning>\n),
+ &ent($_->{Message}) for @{$File->{Warnings}};
print qq( </warnings>\n);
}
@@ -2434,7 +2683,7 @@ sub conflict {
#
# Construct a self-referential URL from a CGI.pm $q object.
sub self_url_q {
- my $q = shift;
+ my ($q, $File) = @_;
my $thispage = $File->{Env}->{'Self URI'};
$thispage .= '?uri=' . uri_escape($q->param('uri'));
$thispage .= ';ss=1' if $q->param('ss');
@@ -2475,11 +2724,59 @@ sub self_url_file {
$thispage .= ';sp=1' if $File->{Opt}->{'Show Parsetree'};
$thispage .= ';noatt=1' if $File->{Opt}->{'No Attributes'};
$thispage .= ';outline=1' if $File->{Opt}->{'Outline'};
+ $thispage .= ';verbose=1' if $File->{Opt}->{'Verbose'};
$thispage .= ';No200=1' if $File->{Opt}->{'No200'};
return $thispage;
}
+
+
+
+
+################################################################################
+# Abandon all hope ye who enter here... ########################################
+################################################################################
+
+#
+# This is where the SOAP magic happens.
+package MySOAP;
+
+sub check {
+ my $class = shift || '';
+ my $uri = shift || '';
+ my $File = &main::handle_uri($uri, {});
+ $File = &main::find_xml_encoding($File);
+ if ($File->{Charset}->{HTTP}) { warn "HTTP";
+ $File->{Charset}->{Use} = $File->{Charset}->{HTTP};
+ } elsif ($File->{ContentType} =~ m(^text/([-.a-zA-Z0-9]\+)?xml$)) { warn "CT";
+ $File->{Charset}->{Use} = 'us-ascii';
+ } elsif ($File->{Charset}->{XML}) { warn "XML";
+ $File->{Charset}->{Use} = $File->{Charset}->{XML};
+ } elsif ($File->{Charset}->{Auto} =~ /^utf-16[bl]e$/ && $File->{BOM} == 2) { warn "autoBOM";
+ $File->{Charset}->{Use} = 'utf-16';
+ } elsif ($File->{ContentType} =~ m(^application/([-.a-zA-Z0-9]+\+)?xml$)) { warn "app+xml";
+ $File->{Charset}->{Use} = "utf-8";
+ } elsif (&main::is_xml($File) and not $File->{ContentType} =~ m(^text/)) { warn "text";
+ $File->{Charset}->{Use} = 'utf-8';
+ }
+ $File->{Content} = &main::normalize_newlines($File->{Bytes},
+ &main::exact_charset($File, $File->{Charset}->{Use}));
+ $File = &main::preparse($File);
+ unless ($File->{Charset}->{Use}) {
+ $File->{Charset}->{Use} = $File->{Charset}->{META};
+ }
+ $File->{Type} = 'xhtml+xml' if $File->{DOCTYPE} =~ /xhtml/i;
+ $File->{Type} = 'mathml+xml' if $File->{DOCTYPE} =~ /mathml/i;
+ $File = &main::parse($File);
+ if ($File->{'Is Valid'}) {
+ return $File->{ESIS};
+ } else {
+ return $File->{Errors};
+# return join '', map {"$_->{line}:$_->{char}:$_->{msg}\n"} @{$File->{Errors}};
+ }
+}
+
# Local Variables:
# mode: perl
# indent-tabs-mode: nil