summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhugo <hugo@localhost>2000-03-01 23:15:42 +0000
committerhugo <hugo@localhost>2000-03-01 23:15:42 +0000
commit6b5ee4dc8be2cd800c3681eaf4d8f2d194285def (patch)
treee74135abdc05a47d5da49bd10fa63f8c026af28f
parent66aabc5c76b9406fd600f499c0392d13fbe81dce (diff)
downloadmarkup-validator-6b5ee4dc8be2cd800c3681eaf4d8f2d194285def.zip
markup-validator-6b5ee4dc8be2cd800c3681eaf4d8f2d194285def.tar.gz
markup-validator-6b5ee4dc8be2cd800c3681eaf4d8f2d194285def.tar.bz2
Fixed authentication problem.
Added a limit for the number of documents checked with the online version. Fixed the -h command line option
-rwxr-xr-xhttpd/cgi-bin/checklink.pl84
1 files changed, 62 insertions, 22 deletions
diff --git a/httpd/cgi-bin/checklink.pl b/httpd/cgi-bin/checklink.pl
index 605457a..0d3fbc4 100755
--- a/httpd/cgi-bin/checklink.pl
+++ b/httpd/cgi-bin/checklink.pl
@@ -5,7 +5,7 @@
# (c) 1999-2000 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink.pl,v 2.33 2000-02-24 22:22:05 hugo Exp $
+# $Id: checklink.pl,v 2.34 2000-03-01 23:15:42 hugo Exp $
#
# This program is licensed under the W3C(r) License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -31,7 +31,7 @@ $| = 1;
# Version info
my $PROGRAM = 'W3C checklink';
-my $VERSION = q$Revision: 2.33 $ . '(c) 1999-2000 W3C';
+my $VERSION = q$Revision: 2.34 $ . '(c) 1999-2000 W3C';
my $REVISION; ($REVISION = $VERSION) =~ s/Revision: (\d+\.\d+) .*/$1/;
# Different options specified by the user
@@ -51,6 +51,11 @@ my $_trusted = '\.w3\.org';
my $_http_proxy;
my $_recursive = 0;
my $_base_location = '.';
+my $_contact_address = 'webreq@w3.org';
+
+# Restrictions for the online version
+my $_sleep_time = 3;
+my $_max_documents = 50;
# Global variables
# Used for the output
@@ -63,6 +68,10 @@ my %processed;
my %results;
# List of redirects
my %redirects;
+# Count of the number of documents checked
+my $doc_count = 0;
+# Time stamp
+my $timestamp = &get_timestamp;
if ($#ARGV >= 0) {
$_cl = 1;
@@ -73,8 +82,8 @@ if ($#ARGV >= 0) {
}
my $uri;
foreach $uri (@uris) {
- if (! $_summary) {
- printf("%s %s\n", $PROGRAM ,$VERSION);
+ if (!$_summary) {
+ printf("%s %s\n", $PROGRAM ,$VERSION) if (! $_html);
} else {
$_verbose = 0;
$_progress = 0;
@@ -82,6 +91,9 @@ if ($#ARGV >= 0) {
$uri = urize($uri);
&check_uri($uri);
}
+ if (($doc_count > 0) && !$_summary) {
+ printf("\n%s\n", &global_stats());
+ }
} else {
use CGI;
use CGI::Carp qw(fatalsToBrowser);
@@ -271,7 +283,10 @@ sub urize() {
sub check_uri() {
my ($uri, $html_stuff) = @_;
- if ($_html && $html_stuff) {
+ # Are we in a recursion cycle?
+ my $in_recursion = !$first;
+
+ if ($_html) {
$first = 1;
} else {
$first = 0;
@@ -283,20 +298,15 @@ sub check_uri() {
}
# Get the document
- my $response = &get_document('GET', $uri, 1, \%redirects);
+ my $response = &get_document('GET', $uri, $in_recursion, \%redirects);
if (defined($response->{Stop})) {
- if ($html_stuff) {
- &html_header($uri);
- }
- &hprintf("\nError: %d %s\n",
- $response->code(), $response->message());
- if ($html_stuff) {
- &html_footer();
- }
return(-1);
}
+ # We are checking a new document
+ $doc_count++;
+
if ($_html) {
if ($html_stuff) {
&html_header($uri);
@@ -448,9 +458,20 @@ sub check_uri() {
print('-');
}
} else {
- print('<hr>');
# For the online version, wait for a while to avoid abuses
- sleep(3);
+ if (!$_cl) {
+ if ($doc_count == $_max_documents) {
+ print("<hr>\n<p><strong>Maximum number of documents reached!</strong> Please contact <a href=\"mailto:$_contact_address\">$_contact_address</a> if you need to check more than $_max_documents documents at once.</a></p>\n");
+ }
+ if ($doc_count >= $_max_documents) {
+ $doc_count++;
+ print("<p>Not checking <strong>$u</strong></p>\n");
+ $processed{$u} = 1;
+ next;
+ }
+ }
+ print('<hr>');
+ sleep($_sleep_time);
}
print "\n";
&check_uri($u, 0);
@@ -481,7 +502,7 @@ sub get_document() {
if ($_html) {
&html_header($uri);
}
- &hprintf("Error: %d %s\n",
+ &hprintf("\nError: %d %s\n",
$response->code(), $response->message());
if ($_html) {
&html_footer();
@@ -977,7 +998,7 @@ sub authentication() {
print(STDERR "Use the -u and -p options to specify a username and password.\n");
} else {
printf("Status: 401 Authorization Required\nWWW-Authenticate: %s\nConnection: close\nContent-Type: text/html\n\n", $r->headers->www_authenticate);
- printf("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">
+ printf("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">
<html>
<head>
<title>401 Authorization Required</title>
@@ -1008,7 +1029,7 @@ sub time_diff() {
for ($start[1], $stop[1]) {
$_ /= 1_000_000;
}
- return(sprintf("%.2f", ($stop[0]+$stop[1])-($start[0]+$start[1])));
+ return(sprintf("%.1f", ($stop[0]+$stop[1])-($start[0]+$start[1])));
}
########################
@@ -1369,6 +1390,17 @@ sub links_summary {
###############################################################################
+################
+# Global stats #
+################
+
+sub global_stats() {
+ my $stop = &get_timestamp();
+ return sprintf("Checked %d document(s) in %ss.",
+ ($doc_count<=$_max_documents? $doc_count : $_max_documents),
+ &time_diff($timestamp, $stop));
+}
+
##################
# HTML interface #
##################
@@ -1379,9 +1411,12 @@ sub html_header() {
if (defined($_[1])) {
print "Cache-Control: no-cache\nPragma: no-cache\n";
}
- print "Content-type: text/html
+ if (! $_cl) {
+ print 'Content-type: text/html';
+ }
+ print "
-<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">
+<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">
<html>
<head>
<title>W3C Link Ckecker: $uri</title>
@@ -1451,6 +1486,11 @@ sub show_url() {
}
sub html_footer() {
+
+ if (($doc_count > 0) && !$_quiet) {
+ printf("<p>%s</p>\n", &global_stats());
+ }
+
print "
<hr>
<address>
@@ -1487,7 +1527,7 @@ sub print_form() {
<br>
<input type=\"checkbox\" name=\"hide_dir_redirects\"> Hide directory redirects
<br>
- <input type=\"checkbox\" name=\"recursive\"> Check linked documents recursively
+ <input type=\"checkbox\" name=\"recursive\"> Check linked documents recursively <small>(maximum: $_max_documents documents; sleeping $_sleep_time\s between each document)</small>
</p>
<p><input type=\"submit\" name=\"submit\" value=\"Check\"></p>
</form>