summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgerald <gerald@localhost>1999-08-31 22:49:16 +0000
committergerald <gerald@localhost>1999-08-31 22:49:16 +0000
commit4a4066c7e304f9f0649c7622c3bbdbcb244144c5 (patch)
treefe497dd2914e26d8de1a9bdf82d0d8c8a69542a4
parentd735f273db67118680238af5d230714f3caeaad5 (diff)
downloadmarkup-validator-4a4066c7e304f9f0649c7622c3bbdbcb244144c5.zip
markup-validator-4a4066c7e304f9f0649c7622c3bbdbcb244144c5.tar.gz
markup-validator-4a4066c7e304f9f0649c7622c3bbdbcb244144c5.tar.bz2
added preliminary XML support; fixed a couple minor bugs
-rwxr-xr-xhttpd/cgi-bin/check255
1 files changed, 143 insertions, 112 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 4f4adb5..29ed7fc 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -8,7 +8,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.27 1999-08-25 03:03:02 gerald Exp $
+# $Id: check,v 1.28 1999-08-31 22:49:16 gerald Exp $
use LWP::UserAgent;
@@ -16,8 +16,8 @@ use LWP::UserAgent;
# Constant definitions
#############################################################################
-$cvsrevision = '$Revision: 1.27 $';
-$cvsdate = '$Date: 1999-08-25 03:03:02 $';
+$cvsrevision = '$Revision: 1.28 $';
+$cvsdate = '$Date: 1999-08-31 22:49:16 $';
$logfile = "/var/log/httpd/val-svc";
@@ -33,7 +33,8 @@ $sp = "/usr/local/bin/nsgmls";
$nkf = "/usr/local/bin/nkf";
$sgmldecl = "$sgmlstuff/REC-html40-971218/HTML4.decl";
-$xhtmldecl = "$sgmlstuff/WD-html-in-xml-19990304/DTD/xhtml1.dcl";
+$xhtmldecl = "$sgmlstuff/PR-xhtml1-19990824/xhtml1.dcl";
+$xmldecl = "/usr/local/src/validator/htdocs/sgml-lib/sp-1.3/pubtext/xml.dcl";
$revision = $cvsrevision;
$revision =~ s/^\$Revision: //;
@@ -110,9 +111,10 @@ $gifborder = " border=0";
'-//W3C//DTD HTML 4.01//EN', '<a href="http://www.w3.org/TR/1999/PR-html40-19990824/">HTML 4.01</a>',
'-//W3C//DTD HTML 4.01 Transitional//EN', '<a href="http://www.w3.org/TR/1999/PR-html40-19990824/">HTML 4.01</a> Transitional',
'-//W3C//DTD HTML 4.01 Frameset//EN', '<a href="http://www.w3.org/TR/1999/PR-html40-19990824/">HTML 4.01</a> Frameset',
- '-//W3C//DTD XHTML 1.0 Strict//EN', '<a href="http://www.w3.org/TR/WD-html-in-xml/">XHTML 1.0</a> Strict',
- '-//W3C//DTD XHTML 1.0 Transitional//EN', '<a href="http://www.w3.org/TR/WD-html-in-xml/">XHTML 1.0</a> Transitional',
- '-//W3C//DTD XHTML 1.0 Frameset//EN', '<a href="http://www.w3.org/TR/WD-html-in-xml/">XHTML 1.0</a> Frameset'
+ '-//W3C//DTD XHTML 1.0 Strict//EN', '<a href="http://www.w3.org/TR/1999/PR-xhtml1-19990824/">XHTML 1.0</a> Strict',
+ '-//W3C//DTD XHTML 1.0 Transitional//EN', '<a href="http://www.w3.org/TR/1999/PR-xhtml1-19990824/">XHTML 1.0</a> Transitional',
+ '-//W3C//DTD XHTML 1.0 Frameset//EN', '<a href="http://www.w3.org/TR/1999/PR-xhtml1-19990824/">XHTML 1.0</a> Frameset',
+ 'XML', '<a href="http://www.w3.org/TR/REC-xml">XML</a>'
);
@@ -270,120 +272,38 @@ $response = $ua->request($request);
if ( $response->code != 200 ) {
$optionstring = &build_options;
- if ( ( $response->code == "302" ) || ( $response->code == "301" ) ){
- # this (server name grabbing) should be moved elsewhere, probably.
- ($server_name) = ($uri =~ /^http:\/\/([^\/]*)/i);
-
- if ( $redirect_uri !~ /:\/\// ) {
- $whiney_location_message = qq{
-<p>
- <strong>Note</strong>: the HTTP server at $server_name is returning broken
- "Location:" headers. According to <a
- href="http://www.w3.org/Protocols/">the HTTP specifications</a>,
- the Location header should be an absolute URI; this server is returning
- relative URIs instead. If you are the maintainer of this server,
- please arrange for this bug to be fixed.
-</p>
-};
- $redirect_uri = "http://$server_name$redirect_uri";
- }
-
- print $header;
- print "<p>\n I got the following unexpected response when trying to ";
- print "retrieve $uri:\n";
- print "</p>\n\n<blockquote> <code>".$response->code." ".$response->message."</code>\n</blockquote>\n";
- print <<"EOF";
-
-<p>
- This indicates that the server has redirected the request to a different
- URI.
-</p>
-$whiney_location_message
-<p>
- The URI it was redirected to is:
-</p>
-
-<blockquote>
- <a href="/check?uri=$redirect_uri$optionstring">$redirect_uri</a>
-</blockquote>
-
-EOF
-
- }
- elsif ( $response->code == 401 ) {
+ if ( $response->code == 401 ) {
$response->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/;
my $realm = $1;
my $resource = $response->request->url;
my $authHeader = $response->headers->www_authenticate;
- print <<"EOF";
-Status: 401 Authorization Required
-WWW-Authenticate: $authHeader
-Connection: close
-Content-Type: text/html
-
-<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">
-<HTML><HEAD>
-<TITLE>401 Authorization Required</TITLE>
-</HEAD><BODY>
-<H1>Authorization Required</H1>
-<p>
- Sorry, I am not authorized to access the specified URI.
-</p>
-
-<p>
- The URI you specified,
-</p>
-
-<blockquote>
- <code><a href="$resource">$resource</a></code>
-</blockquote>
-
-<p>
- returned a 401 "authorization required" response when I tried
- to download it.
-</p>
-
-<p>
- You should have been prompted by your browser for a
- username/password pair; if you had supplied this information, I
- would have forwarded it to your server for authorization to
- access the resource. You can use your browser's "reload" function
- to try again, if you wish.
-</p>
-
-<p>
- Of course, you may not want to trust me with this information,
- which is fine. I can tell you that I don't log it or do
- anything else nasty with it, and you can <a
- href="http://validator.w3.org/source/">download the source for
- this service</a> to see what it does, but you have no guarantee
- that this is actually the code I'm using; you basically have to
- decide whether to trust me or not. :-)
-</p>
-
-<p>
- Note that you shouldn't use HTTP Basic Authentication for
- anything which really needs to be private, since the password
- goes across the network unencrypted.
-</p>
-EOF
-
+ &print_401_auth_required_message( $resource, $realm, $authHeader );
}
else {
print $header;
- print "<p>\n Please make sure you have entered the URI correctly.\n</p>";
+ &print_unknown_http_error_message( $uri, $response->code,
+ $response->message );
}
&clean_up_and_exit;
}
-unless ($response->headers->content_type =~ /text\/html/i) {
+$content_type = $response->headers->content_type;
+
+if ( ( $content_type =~ /text\/xml/i ) ||
+ ( $content_type =~ /application\/xml/i ) ) {
+ $xml = 1;
+}
+elsif ($content_type =~ /text\/html/i) {
+ $html = 1;
+}
+else {
print $header;
print <<"EOF";
<p>
Sorry, I can't validate this document because its returned
- content-type was not "text/html", the media type for HTML
- documents.
+ content-type was <code>$content_type</code>, which is not
+ currently supported by this service.
</p>
EOF
@@ -401,7 +321,9 @@ $jump_links
EOF
@file = split '\n',$response->content;
-( $guessed_doctype, $doctype ) = &check_for_doctype( \@file );
+if ( $html || $xhtml ) {
+ ( $guessed_doctype, $doctype ) = &check_for_doctype( \@file );
+}
if ( $doctype =~ /xhtml/i ) {
$xhtml = 1;
@@ -417,7 +339,6 @@ foreach $line (@file) {
}
}
-$content_type = $response->headers->header("Content-Type");
( $http_charset ) = ( $content_type =~ /;\s*charset=(.*)/i );
$content_type =~ s/;.*$//;
$content_type =~ s/\s*$//g;
@@ -472,14 +393,22 @@ if ( $xhtml ) {
$xmlflags = "-wxml ";
$decl = $xhtmldecl;
}
-else {
+elsif ( $xml ) {
+ $ENV{SP_CATALOG_FILES} = "$sgmlstuff/sp-1.3/pubtext/xml.soc";
+ $ENV{SGML_SEARCH_PATH} = "$sgmlstuff/sp-1.3/pubtext/";
+ $ENV{SP_CHARSET_FIXED}="YES";
+ $ENV{SP_ENCODING}="XML";
+ $xmlflags = "-wxml -wno-valid ";
+ $decl = $xmldecl;
+}
+else { # must be HTML (for now)
$decl = $sgmldecl;
$catalog = "-c $sgmlstuff/catalog";
}
$command = "$codeconv $sp -E0 $xmlflags $catalog $decl";
-# print " <li>nsgmls command line: $command\n";
+# print " <li>nsgmls command line: <code>$command</code>\n";
open( CHECKER, "| $command - >$temp.esis 2>$temp" )
|| die "couldn't open checker: $!";
@@ -517,6 +446,9 @@ $version = "unknown";
if ( $xhtml ) {
$fpi = $doctype;
}
+elsif ( $xml ) {
+ $fpi = "XML";
+}
else {
for (@esis) {
next unless /^AVERSION CDATA (.*)/;
@@ -524,7 +456,7 @@ else {
last;
}
}
-$version = $pub_ids{$fpi};
+$version = $pub_ids{$fpi} || "unknown";
if ( $guessed_doctype ) {
push( @fake_errors, "nsgmls:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (${lt}a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\"${gt}explanation...${lt}/a${gt})\n" );
@@ -551,10 +483,24 @@ EOHD
}
-print " <li>Level of HTML: <b>$version</b>.\n";
+print " <li>Document type: <b>$version</b>.\n";
print "</ul>\n\n";
+if ( $xml ) {
+print <<"EOHD";
+ <p>
+ <strong>Note: experimental XML support was added to this service
+ on Aug 31, 1999, but it isn't quite working yet; stay tuned to <a
+ href="http://lists.w3.org/Archives/Public/www-validator/">the
+ <code>www-validator</code> mailing list</a> for updates, and
+ please don't trust this service's output for XML documents
+ in the meantime.</strong>
+ </p>
+EOHD
+
+}
+
print <<"EOHD";
<p>
Below are the results of attempting to parse this document with
@@ -1213,7 +1159,7 @@ sub check_for_doctype {
return 0, ""; # doc does have a doctype
}
- $line =~ s/<!--([^-]|-[^-])*--\s*>//go; # strip comments,
+ $line =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # strip comments,
# so the next line doesn't find commented-out markup etc.
# (this doesn't handle multi-line comments, unfortunately)
@@ -1253,3 +1199,88 @@ sub check_for_doctype {
}
+sub print_401_auth_required_message {
+
+ my $resource = shift;
+ my $realm = shift;
+ my $authHeader = shift;
+
+ print <<"EOF";
+Status: 401 Authorization Required
+WWW-Authenticate: $authHeader
+Connection: close
+Content-Type: text/html
+
+<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">
+<HTML><HEAD>
+<TITLE>401 Authorization Required</TITLE>
+</HEAD><BODY>
+<H1>Authorization Required</H1>
+<p>
+ Sorry, I am not authorized to access the specified URI.
+</p>
+
+<p>
+ The URI you specified,
+</p>
+
+<blockquote>
+ <code><a href="$resource">$resource</a></code>
+</blockquote>
+
+<p>
+ returned a 401 "authorization required" response when I tried
+ to download it.
+</p>
+
+<p>
+ You should have been prompted by your browser for a
+ username/password pair; if you had supplied this information, I
+ would have forwarded it to your server for authorization to
+ access the resource. You can use your browser's "reload" function
+ to try again, if you wish.
+</p>
+
+<p>
+ Of course, you may not want to trust me with this information,
+ which is fine. I can tell you that I don't log it or do
+ anything else nasty with it, and you can <a
+ href="http://validator.w3.org/source/">download the source for
+ this service</a> to see what it does, but you have no guarantee
+ that this is actually the code I'm using; you basically have to
+ decide whether to trust me or not. :-)
+</p>
+
+<p>
+ Note that you shouldn't use HTTP Basic Authentication for
+ anything which really needs to be private, since the password
+ goes across the network unencrypted.
+</p>
+EOF
+
+}
+
+sub print_unknown_http_error_message {
+
+ my $uri = shift;
+ my $code = shift;
+ my $message = shift;
+
+ print <<"EOF";
+ <p>
+ I got the following unexpected response when trying to
+ retrieve <code><a href="$uri">$uri</a></code>:
+ </p>
+
+ <blockquote>
+ <code>$code $message</code>
+ </blockquote>
+
+ <p>
+ Please make sure you have entered the URI correctly.
+ </p>
+
+EOF
+
+}
+