Note: This service will be ..."; # # DOCTYPEs my $html32_doctype = q(); my $html40s_doctype = q(); my $html40t_doctype = q(); my $html40f_doctype = q(); my $xhtmlt_doctype = q( plain text version string $pub_ids = &read_cfg($fpis_db); # Errors -> fragment identifier $element_uri = &read_cfg($elem_db); # Element -> URI fragment $file_type = &read_cfg($type_db); # Content -> File -type $doctypes = &read_cfg($dtds_db); # Name -> doctype # # Set up signal handlers. $SIG{TERM} = \&erase_stuff; $SIG{KILL} = \&erase_stuff; $SIG{PIPE} = 'IGNORE'; # # delete() the, possibly tainted, $PATH. delete $ENV{PATH}; ############################################################################# # Process CGI variables ############################################################################# # # Create a new CGI object. my $q = new CGI; # # Backwards compatibility; see # http://lists.w3.org/Archives/Public/www-validator/1999JulSep/0197 # http://lists.w3.org/Archives/Public/www-validator/1999JulSep/0212 if (scalar $q->param) { foreach my $param ($q->param) { $q->param($param, TRUE) unless $q->param($param); } } # # Futz the URI so "/referer" works. if ($q->path_info eq '/referer') { $q->param('uri', $q->referer); } # # Use "url" unless a "uri" was also given. if ($q->param('url') and not $q->param('uri')) { $q->param('uri', $q->param('url')); } # # Supercede URI with an uploaded file. if ($q->param('uploaded_file')) { $q->param('uri', 'upload://' . $q->param('uploaded_file')); } # # Send them to the homepage unless we can extract a URI from either of the # acceptable sources: uri, url or /referer. &redirect_to_home_page unless length($q->param('uri')) > 5; # # Munge the URI to include commonly omitted prefixes/suffixes. $q->param('uri', $q->param('uri') . '/') unless $q->param('uri') =~ m(/); $q->param('uri', 'http://' . $q->param('uri')) if $q->param('uri') =~ m(^www)i; ############################################################################# # Output validation results ############################################################################# # # A string containing the HTML header for validation results. # We save it in a string instead of printing it in case we need to abort before # we have any meaningfull results to report. @@ May not be necessary! my $header = <<"EOF"; Content-Type: text/html; charset=utf-8 $html40t_doctype
Sorry, this type of URI is not supported by this service.
URIs should be in the form:
$abs_svc_uri
(There are other types of URIs, too, but only http://
URIs
are currently supported by this service.)
Sorry, I am unable to validate this document because its returned
content-type was $File->{Type}
, which is not
currently supported by this service.
$command
\n";
}
open CHECKER, "|$command - >$temp.esis"
or die "open(|$command - >$temp.esis) returned: $!\n";
print CHECKER $doctype, "\n" if $guessed_doctype == 1;
if ( $File->{Charset} eq 'utf-8'
or $File->{Charset} eq 'us-ascii'
or $File->{Charset} eq 'unknown') {
for (@{$File->{Content}}) {print CHECKER $_, "\n"};
} else {
# workaround for windows-nnnn charsets missing from glibc<2.2
my $temp_charset = $File->{Charset};
$temp_charset =~ s/^windows-(\d+)$/CP$1/i;
eval {my $c = Text::Iconv->new($temp_charset, 'utf-8')};
if (not $@) {
my $c = Text::Iconv->new($temp_charset, 'utf-8');
for (@{$File->{Content}}) {print CHECKER $c->convert("$_\n")};
} else {
&print_charset_error($@, $File->{Charset});
&clean_up_and_exit;
}
}
close CHECKER;
open ERRORS, "<$temp" or die "open($temp) returned: $!\n";
my @errors = $File->{HTTP_Charset}
") is different from the one
specified in the META element ("$File->{META_Charset}
").
I will use "$File->{Charset}
" for this validation.
EOHD
} elsif ($File->{HTTP_Charset} ne $File->{XML_Charset}
and $File->{HTTP_Charset} ne ''
and $File->{XML_Charset} ne ''
and $File->{Charset} ne 'unknown') {
print <<"EOHD";
The character encoding specified in the HTTP
header ("$File->{HTTP_Charset}
") is different from the one
specified in the XML declaration ("$File->{XML_Charset}
").
I will use "$File->{Charset}
" for this validation.
EOHD
}
print ' ' x 4, qq(Below are the results of checking this document for XML well-formedness${validity_blurb}.
EOHD } else { print <<"EOHD";Below are the results of attempting to parse this document with an SGML parser.
EOHD } if ( $? || ($guessed_doctype == 1) ) { my($line, $col, $type, $msg); print "Fatal error! $msg\n\n"; print "
I couldn't parse this document, because it " . "uses a public\n identifier that's not in my catalog!\n
\n"; &output_doctype_spiel; last; } if ( $msg =~ /unrecognized ({{)?DOCTYPE(}})?/i ) { print "Fatal error! $msg\n\n"; print "
I couldn't parse this document, because it " . "uses a public\n identifier that's not in my catalog!\n
\n"; &output_doctype_spiel; last; } if ( $msg =~ /^cannot open / ) { print "Fatal error! $msg\n\n"; print "
I couldn't parse this document, because it " . "makes reference to\n a system-specific file instead of " . "using a well-known public identifier\n to specify the " . "level of HTML being used.\n
\n"; &output_doctype_spiel; last; } if ( $msg =~ /^cannot find / ) { print "Fatal error! $msg\n\n"; print "
I couldn't parse this document, because it " . "makes reference to\n a system-specific file instead of " . "using a well-known public identifier\n to specify the " . "level of HTML being used.\n
\n"; &output_doctype_spiel; last; } $line-- if ( $guessed_doctype == 1 ); my $newline = $File->{Content}->[$line - 1]; # make sure there are no ^P's or ^Q's in the file, since we need to use # them to represent '<' and '>' temporarily. We'll just change them to # literal P's and Q's for a lack of anything better to do with them. my $lt = "\020"; my $gt = "\021"; $newline =~ s/\020/P/go; $newline =~ s/\021/Q/g; my $orig_col = $col; ($newline, $col) = &truncate_line($newline, $col); # temporarily strip curlies from lq-nsgmls output. # @@ should link HTML elements using $elem_db instead. $msg =~ s/[{}]//g; # figure out the index into the %frag associative array for the # "explanation..." links to the KGV FAQ. my $msgindex = $msg; $msgindex =~ s/"[^\"]+"/FOO/g; $msgindex =~ s/[^A-Za-z ]//g; $newline =~ s/&/&/go; $newline =~ s/</go; $newline =~ s/${lt}//g; $newline =~ s/\t/ /g; $newline =~ s/ //g; print "\n"; print "$newline
\n"; print " " x ($col+2); # 2 is the number of spaces beforeabove print " " x 4 if $col != $orig_col; # only for truncated lines print "^
\n"; print qq{Error: $msg}; if (defined $frag->{$msgindex}) { print qq{ (explanation...)}; } print "
\n"; } print "\n Sorry, I can't validate this document.\n
\n"; } elsif ( $File->{Type} eq 'xml' ) { print "\n\n Sorry, this document is not well-formed XML.\n
\n"; } else { print "\n\n Sorry, this document does not validate as $version.\n
\n\n"; &output_css_validator_blurb( $q->param('uri') ); } } else { my $gifborder = ' border="0"'; my $xhtmlendtag = ''; my $image_uri; my $alttext; my $gifhw; if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print "\n\n No errors found! "; print "*\n\n"; } else { print "\n
\n No errors found!\n\n"; } if ( $version ne "unknown" ) { if ( $version =~ /^HTML 2\.0$/ ) { $image_uri = "http://validator.w3.org/images/vh20"; $alttext = "Valid HTML 2.0!"; $gifborder = ""; } elsif ( $version =~ /HTML 3\.2 ) { $image_uri = "http://www.w3.org/Icons/valid-html32"; $alttext = "Valid HTML 3.2!"; $gifhw = ' height="31" width="88"'; } elsif ( $version =~ /HTML 4\.0<\/a> Strict$/ ) { $image_uri = "http://www.w3.org/Icons/valid-html40"; $alttext = "Valid HTML 4.0!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; } elsif ( $version =~ /HTML 4\.0<\/a> / ) { $image_uri = "http://www.w3.org/Icons/valid-html40"; $alttext = "Valid HTML 4.0!"; $gifhw = ' height="31" width="88"'; } elsif ( $version =~ /HTML 4\.01<\/a> Strict$/ ) { $image_uri = "http://www.w3.org/Icons/valid-html401"; $alttext = "Valid HTML 4.01!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; } elsif ( $version =~ /HTML 4\.01<\/a> / ) { $image_uri = "http://www.w3.org/Icons/valid-html401"; $alttext = "Valid HTML 4.01!"; $gifhw = ' height="31" width="88"'; } elsif ( $version =~ /XHTML 1\.0<\/a> / ) { $image_uri = "http://www.w3.org/Icons/valid-xhtml10"; $alttext = "Valid XHTML 1.0!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; $xhtmlendtag = " /"; } elsif ( $version =~ /HTML 3\.0/ ) { $image_uri = "http://validator.w3.org/images/vh30"; $alttext = "Valid HTML 3.0!"; } elsif ( $version =~ /Netscape/ ) { $image_uri = "http://validator.w3.org/images/vhns"; $alttext = "Valid Netscape-HTML!"; } elsif ( $version =~ /Hotjava/ ) { $image_uri = "http://validator.w3.org/images/vhhj"; $alttext = "Valid Hotjava-HTML!"; } if ( defined $image_uri ) { print <<"EOHD";
Congratulations, this
document validates as $version!
To show your readers that you have taken the care to create an interoperable Web page, you may display this icon on any page that validates. Here is the HTML you could use to add this icon to your Web page:
<p> <a href="${abs_svc_uri}check/referer"><img$gifborder src="$image_uri" alt="$alttext"$gifhw$xhtmlendtag></a> </p>
If you like, you can download a copy of this image (in PNG or GIF format) to keep in your local web directory, and change the HTML fragment above to reference your local image rather than the one on this server.
EOHD } } if ( ( $version eq "unknown" ) || ( ! defined $image_uri ) ) { if ( $guessed_doctype != 0 ) { # if no doctype, only checked wf-ness print "\n Congratulations, this document is well-formed XML.\n
\n"; } else { # checked validity print "\n Congratulations, this document validates as the document type specified! (I don't have an icon for this one yet, sorry.)\n
\n"; } } unless ($q->param('uploaded_file')) { my $thispage = $q->self_url; &output_css_validator_blurb($q->param('uri')); print <<"EOHD";If you would like to create a link to this page (i.e., this validation result) to make it easier to re-validate this page in the future or to allow others to validate your page, the URI is:
$thispage
(Or, you can just add the current page to your bookmarks or hotlist.)
EOHD } if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print qq{This validator is based on SP, which has some limitations in its support for XML.
}; } } if ( $q->param('weblint') ) { my $pedanticflags; my $pedantic_blurb; if ( $q->param('pw') ) { $pedanticflags = '-pedantic -e mailto-link'; $pedantic_blurb = ' (in "pedantic" mode)'; } else { $pedanticflags = ''; } print <<"EOF";Below are the results of running Weblint on this document$pedantic_blurb:
Note: Weblint is a useful HTML syntax and style checker, but does not do true HTML validation. Also, the version of weblint used by this service has not been updated for some time, so some of the messages below may be misleading or inaccurate.
EOF open WEBLINT, "|$weblint -s $pedanticflags - 2>&1 >$temp.weblint" or die "open($weblint) returned: $!\n"; for (@{$File->{Content}}) {print WEBLINT $_, "\n"}; close WEBLINT; print "\n\n"; if ( $? ) { print "\n Looks good to me!\n\n"; } print "\n\n"; } if ($q->param('outline')) { print <<'EOF';
Below is an outline for this document, automatically generated from the
heading tags (<H1>
through <H6>
.)
If this does not look like a real outline, it is likely that the heading tags are not being used properly. (Headings should reflect the logical structure of the document; they should not be used simply to add emphasis, or to change the font size.)
Below is the source input I used for this validation:
EOF if ($guessed_doctype == 1) { my $gd = $doctype . "\n"; $gd =~ s/&/&/go; $gd =~ s/</go; printf "%4d: %s", 0, $gd; } for (@{$File->{Content}}) { s/&/&/go; s/</go; printf "%4d: %s\n", $line, $line, $_; $line++; } print "\n
I am excluding the attributes, as you requested.
EOF } else { print <<'EOF';You can also view this parse tree without attributes by selecting the appropriate option on the form.
EOF } my $indent = 0; my $prevdata = ''; print "\n"; foreach my $line (@esis) { if ($q->param('noatt')) { # don't show attributes next if $line =~ /^A/; next if $line =~ /^\(A$/; next if $line =~ /^\)A$/; } $line =~ s/\\n/ /g; $line =~ s/\\011/ /g; $line =~ s/\\012/ /g; $line =~ s/\s+/ /g; next if $line =~ /^-\s*$/; if ($line =~ /^-/) { substr($line, 0, 1) = ' '; $prevdata .= $line; next; } elsif ($prevdata) { $prevdata =~ s/&/&/go; $prevdata =~ s/</go; $prevdata =~ s/\s+/ /go; print wrap(' ' x $indent, ' ' x $indent, $prevdata), "\n"; undef $prevdata; } $line =~ s/&/&/go; $line =~ s/</go; if ($line =~ /^\)/) { $indent -= 2; } my $printme; chomp($printme = $line); $printme =~ s{^([()])(.*)} # reformat and add links on HTML elements { my $close = ''; $close = "/" if $1 eq ")"; # ")" -> close-tag "<" . $close . "{lc($2)} . "\">$2<\/a>>" }egx; $printme =~ s,^A, A,; # indent attributes a bit print ' ' x $indent, $printme, "\n"; if ($line =~ /^\(/) { $indent += 2; } } print "\n"; print "
You should make the first line of your HTML document a DOCTYPE declaration, for example, for a typical HTML 4.01 document:
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <HTML> <HEAD> <TITLE>Title</TITLE> </HEAD> <BODY> <-- ... body of document ... --> </BODY> </HTML>EOF } sub output_closing { print <<"EOF";
\n"; print " Please see the validation service's home page.\n"; print "
\n"; &clean_up_and_exit; } sub build_jump_links { my $text; my $count = 0; $count++ if $q->param('ss'); $count++ if $q->param('sp'); $count++ if $q->param('weblint'); $count++ if $q->param('outline'); if ( $count ) { $text .= "\n Jump to: "; if ( $q->param('weblint') ) { $text .= "Weblint Results"; $count--; $text .= " or " if ( $count == 1 ); $text .= ", " if ( $count > 1 ); } if ( $q->param('outline') ) { $text .= "Outline"; $count--; $text .= " or " if ( $count == 1 ); $text .= ", " if ( $count > 1 ); } if ( $q->param('ss') ) { $text .= "Source Listing"; $count--; $text .= " or " if ( $count == 1 ); $text .= ", " if ( $count > 1 ); } if ( $q->param('sp') ) { $text .= "Parse Tree"; } $text .= ".\n
\n\n"; } return $text; } # # Check if the document has a doctype; if it doesn't, try to guess # an appropriate one given the elements used. Returns 2 values. # The first value is: # 0 if there was a DOCTYPE, # 1 if there was no doctype and no xmlns= attribute # on the first element in the document, or # 2 if there was no doctype and there IS an xmlns= attribute # on the first element # The Second value is the doctype or namespace, if any. sub check_for_doctype { my $file = shift; # a reference to @file, for efficiency foreach my $count (0 .. scalar @{$file}) { my $line = $file->[$count]; # does an HTML element precede the doctype on the same line? if ( $line =~ /<[a-z].*//go; # Strip comments, so the next line doesn't find commented-out markup etc. # (this doesn't handle multi-line comments, unfortunately) if ( $line =~ /<[a-z]/i ) { # found an element if ( $line =~ /<[a-z]+ xmlns=['"]([^ "']*)/i ) {# look for an xmlns attr return 2, $1; } last; } } for (@{$file}[0 .. 20]) { return 1, $html40f_doctype if /Sorry, I am not authorized to access the specified URI.
The URI you specified,
$resource
returned a 401 "authorization required" response when I tried to download it.
You should have been prompted by your browser for a username/password pair; if you had supplied this information, I would have forwarded it to your server for authorization to access the resource. You can use your browser's "reload" function to try again, if you wish.
Of course, you may not want to trust me with this information, which is fine. I can tell you that I don't log it or do anything else nasty with it, and you can download the source for this service to see what it does, but you have no guarantee that this is actually the code I'm using; you basically have to decide whether to trust me or not. :-)
Note that you shouldn't use HTTP Basic Authentication for anything which really needs to be private, since the password goes across the network unencrypted.
EOF } sub print_unknown_http_error_message { my $uri = shift; my $code = shift; my $message = shift; print <<"EOF";
I got the following unexpected response when trying to
retrieve $uri
:
$code $message
Please make sure you have entered the URI correctly.
EOF } # # Complain about strange charsets. sub print_charset_error { my $error = shift; my $charset = shift; print <<".EOF.";A fatal error occurred when attempting to transliterate the document charset. Either we do not support this character encoding yet, or you have specified a non-existent character set (typically a misspelling such as "iso8859-1" for "iso-8859-1").
The detected charset was "$charset".
The error was "$error".
If you believe the charset to be valid you can submit a request for that character set (see the feedback page for details) and we will look into supporting it in the future.
.EOF. } sub output_css_validator_blurb { my $uri = shift; $uri = ent($uri); print <<"EOHD";If you use CSS in your document, you should also check it for validity using the W3C CSS Validation Service.
EOHD } # # Read TAB-delimited configuration files. Returns a hash reference. sub read_cfg { my $file = shift; my %cfg; open CFG, $file or die "open($file) returned: $!\n"; while (