summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhugo <hugo@localhost>2000-02-08 21:19:19 +0000
committerhugo <hugo@localhost>2000-02-08 21:19:19 +0000
commit6a4d0825bf6f408524d3d4047b7a74a9c44a87f3 (patch)
treee3d3e0d2ced2ea645e107eb1fcae8ec1305410d9
parente791758e3a15ff895d2a68753fd89b7994450389 (diff)
downloadmarkup-validator-6a4d0825bf6f408524d3d4047b7a74a9c44a87f3.zip
markup-validator-6a4d0825bf6f408524d3d4047b7a74a9c44a87f3.tar.gz
markup-validator-6a4d0825bf6f408524d3d4047b7a74a9c44a87f3.tar.bz2
Added comments
Now correctly reports broken fragments with redirects
-rwxr-xr-xhttpd/cgi-bin/LinkChecker.pl72
-rwxr-xr-xhttpd/cgi-bin/checklink.pl72
2 files changed, 94 insertions, 50 deletions
diff --git a/httpd/cgi-bin/LinkChecker.pl b/httpd/cgi-bin/LinkChecker.pl
index 3c03c66..5bafbeb 100755
--- a/httpd/cgi-bin/LinkChecker.pl
+++ b/httpd/cgi-bin/LinkChecker.pl
@@ -5,7 +5,7 @@
# (c) 1999-2000 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: LinkChecker.pl,v 1.26 2000-02-03 18:30:29 hugo Exp $
+# $Id: LinkChecker.pl,v 1.27 2000-02-08 21:19:19 hugo Exp $
#
# This program is licensed under the W3C(r) License.
#
@@ -27,7 +27,7 @@ $| = 1;
# Version info
my $PROGRAM = 'W3C LinkChecker';
-my $VERSION = q$Revision: 1.26 $ . '(c) 1999-2000 W3C';
+my $VERSION = q$Revision: 1.27 $ . '(c) 1999-2000 W3C';
my $REVISION; ($REVISION = $VERSION) =~ s/Revision: (\d+\.\d+) .*/$1/;
# State of the program
@@ -113,6 +113,7 @@ sub parse_arguments() {
} elsif (m/^-[^-upytdc]/) {
if (m/q/) {
$_quiet = 1;
+ $_summary = 1;
}
if (m/s/) {
$_summary = 1;
@@ -268,12 +269,15 @@ sub check_uri() {
$response->header('Content-type'));
return(-1);
}
+ # URL manipulation
my $base_uri = URI->new($response->base());
my $uri_uri = URI->new($uri);
my $absolute_base = $base_uri->abs($uri_uri);
my $p = &parse_document($absolute_base->as_string(), $response->content(), 1);
my $base = URI->new($p->{base});
+
# Check anchors
+ ###############
if (! $_summary) {
print("Checking anchors:\n");
}
@@ -291,8 +295,11 @@ sub check_uri() {
if (! $_summary) {
print(" done.\n");
}
+
# Check links
+ #############
my %links;
+ # Record all the links
foreach $link (keys %{$p->{Links}}) {
my $link_uri = URI->new($link);
my $abs_link_uri = URI->new_abs($link_uri, $base);
@@ -316,6 +323,7 @@ sub check_uri() {
$results{$uri}{$uri}{orig} = $results{$uri}{$uri}{code};
$results{$uri}{$uri}{message} = 'Page tested';
$results{$uri}{$uri}{success} = 1;
+ # Build the list of broken URI's
my %broken;
foreach $u (keys %links) {
# Don't check mailto: URI's
@@ -336,11 +344,13 @@ sub check_uri() {
($results{$u}{$fragment}?'OK':'Not found'),
join(',', keys %{$links{$u}{$fragment}}));
}
+ # A broken fragment?
if ($results{$u}{$fragment} == 0) {
$broken{$u}{$fragment} += 2;
}
}
} else {
+ # Couldn't find the document
$broken{$u}{$u} = 1;
foreach $fragment (keys %{$links{$u}}) {
$broken{$u}{$fragment}++;
@@ -354,10 +364,12 @@ sub check_uri() {
# Display results
if ($_html) {
if (! $_summary) {
- print '</pre><hr>';
+ print "</pre><hr>\n";
}
}
- print "\n";
+ if (! $_quiet) {
+ print "\n";
+ }
&anchors_summary($p->{Anchors}, \%errors);
&links_summary(\%links, \%results, \%broken, \%redirects);
if ($_html) {
@@ -626,27 +638,29 @@ sub check_validity($, $, \%, \%, \%, \%, $) {
}
my $response;
if ($testing eq $uri) {
- # Mimic an HTTP::Response object
+ # Mimic an HTTP::Response object if we already have the document
$results->{$uri}{$uri}{code} = $testing_code;
$results->{$uri}{$uri}{success} = 1;
} else {
$response = &get_uri($method, $uri);
# Record the redirects
&record_redirects($redirects, $response->{Redirects});
- # Parse the document if necessary and possible
+ # Get the information back from get_uri()
$results->{$uri}{$uri}{code} = $response->code();
$results->{$uri}{$uri}{display} = $results->{$uri}{$uri}{code};
$results->{$uri}{$uri}{orig} = $response->{OriginalCode};
+ # Did we get a redirect?
if ($response->{OriginalCode} != $response->code()) {
- $results->{$uri}{$uri}{success} = 0;
$results->{$uri}{$uri}{orig_message} = $response->{OriginalMessage};
- } else {
- $results->{$uri}{$uri}{success} = $response->is_success();
+ $results->{$uri}{$uri}{redirected} = 1;
}
+ $results->{$uri}{$uri}{success} = $response->is_success();
+ # Stores the authentication information
if (defined($response->{Realm})) {
$results->{$uri}{$uri}{realm} = $response->{Realm};
$results->{$uri}{$uri}{display} = 401;
}
+ # Did it fail?
if (! $results->{$uri}{$uri}{success}) {
$results->{$uri}{$uri}{message} = $response->message();
if ($_verbose) {
@@ -660,6 +674,7 @@ sub check_validity($, $, \%, \%, \%, \%, $) {
if ($#fragments == 0) {
return;
}
+ # There are fragments. Parse the document.
my $p;
if ($testing ne $uri) {
if (!(($results->{$uri}{$uri}{type} = $response->header('Content-type')) =~ m/text\/html/i)) {
@@ -749,6 +764,7 @@ sub time_diff() {
# Handle the redirects #
########################
+# Record the redirects in a hash
sub record_redirects(\%, \%) {
my ($redirects, $sub) = @_;
foreach $r (keys %$sub) {
@@ -756,11 +772,13 @@ sub record_redirects(\%, \%) {
}
}
+# Determine if a request is redirected
sub is_redirected($, %) {
my ($uri, %redirects) = @_;
return(defined($redirects{$uri}));
}
+# Get a list of redirects for a URI
sub get_redirects($, %) {
my ($uri, %redirects) = @_;
my @history = ($uri);
@@ -889,17 +907,24 @@ sub links_summary(\%,\%,\%) {
# List of the broken links
@urls = keys %{$broken};
+ if ($_redirects) {
+ # Add the redirected URI's to the report
+ for $l (keys %$redirects) {
+ next unless (defined($results->{$l})
+ && !defined($broken->{$l}));
+ push(@urls, $l);
+ }
+ }
if ($#urls < 0) {
if (! $_quiet && $_html) {
print "<p>Valid links!</p>\n";
}
return;
}
- print "\n";
if ($_html) {
print('<p>');
}
- print('List of broken ');
+ print("\nList of broken ");
if ($_redirects) {
print('and redirected ');
}
@@ -911,13 +936,6 @@ sub links_summary(\%,\%,\%) {
print("</p>\n<table border=\"1\">\n<tr><td><b>Location</b></td><td><b>Code</b></td><td><b>Fragment</b></td><td><b>Lines</b></td></tr>");
}
print("\n");
- if ($_redirects) {
- for $l (keys %$redirects) {
- next unless (defined($results->{$l})
- && !defined($broken->{$l}));
- push(@urls, $l);
- }
- }
foreach $u (@urls) {
my @fragments = keys %{$broken->{$u}};
# Count the rows displayed
@@ -928,13 +946,17 @@ sub links_summary(\%,\%,\%) {
# Did we get a redirect?
my $redirected = &is_redirected($u, %$redirects);
# List of lines
- my $lines_list;
- if (defined($links->{$u}{$u}{-1})) {
- $lines_list = '-';
- } else {
- $lines_list = join(', ',
- sort {$a <=> $b} keys %{$links->{$u}{$u}});
+ my @total_lines;
+ foreach $f (keys %{$links->{$u}}) {
+ if ($f eq $u) {
+ next if (defined($links->{$u}{$u}{-1}));
+ }
+ foreach $l (keys %{$links->{$u}{$f}}) {
+ push (@total_lines, $l);
+ }
}
+ my $lines_list = join(', ',
+ sort {$a <=> $b} @total_lines);
if ($_html) {
# Main info
for (@redirects_urls = &get_redirects($u, %$redirects)) {
@@ -1007,7 +1029,7 @@ sub links_summary(\%,\%,\%) {
join(', ',
sort {$a <=> $b} keys %{$links->{$u}{$f}}));
} else {
- printf("\t%s\tLines: %s\n",
+ printf("\t%-30s\tLines: %s\n",
# Fragment
$f,
# List of lines
diff --git a/httpd/cgi-bin/checklink.pl b/httpd/cgi-bin/checklink.pl
index 9e9c563..af907ee 100755
--- a/httpd/cgi-bin/checklink.pl
+++ b/httpd/cgi-bin/checklink.pl
@@ -5,7 +5,7 @@
# (c) 1999-2000 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink.pl,v 2.26 2000-02-03 18:30:29 hugo Exp $
+# $Id: checklink.pl,v 2.27 2000-02-08 21:19:19 hugo Exp $
#
# This program is licensed under the W3C(r) License.
#
@@ -27,7 +27,7 @@ $| = 1;
# Version info
my $PROGRAM = 'W3C checklink';
-my $VERSION = q$Revision: 2.26 $ . '(c) 1999-2000 W3C';
+my $VERSION = q$Revision: 2.27 $ . '(c) 1999-2000 W3C';
my $REVISION; ($REVISION = $VERSION) =~ s/Revision: (\d+\.\d+) .*/$1/;
# State of the program
@@ -113,6 +113,7 @@ sub parse_arguments() {
} elsif (m/^-[^-upytdc]/) {
if (m/q/) {
$_quiet = 1;
+ $_summary = 1;
}
if (m/s/) {
$_summary = 1;
@@ -268,12 +269,15 @@ sub check_uri() {
$response->header('Content-type'));
return(-1);
}
+ # URL manipulation
my $base_uri = URI->new($response->base());
my $uri_uri = URI->new($uri);
my $absolute_base = $base_uri->abs($uri_uri);
my $p = &parse_document($absolute_base->as_string(), $response->content(), 1);
my $base = URI->new($p->{base});
+
# Check anchors
+ ###############
if (! $_summary) {
print("Checking anchors:\n");
}
@@ -291,8 +295,11 @@ sub check_uri() {
if (! $_summary) {
print(" done.\n");
}
+
# Check links
+ #############
my %links;
+ # Record all the links
foreach $link (keys %{$p->{Links}}) {
my $link_uri = URI->new($link);
my $abs_link_uri = URI->new_abs($link_uri, $base);
@@ -316,6 +323,7 @@ sub check_uri() {
$results{$uri}{$uri}{orig} = $results{$uri}{$uri}{code};
$results{$uri}{$uri}{message} = 'Page tested';
$results{$uri}{$uri}{success} = 1;
+ # Build the list of broken URI's
my %broken;
foreach $u (keys %links) {
# Don't check mailto: URI's
@@ -336,11 +344,13 @@ sub check_uri() {
($results{$u}{$fragment}?'OK':'Not found'),
join(',', keys %{$links{$u}{$fragment}}));
}
+ # A broken fragment?
if ($results{$u}{$fragment} == 0) {
$broken{$u}{$fragment} += 2;
}
}
} else {
+ # Couldn't find the document
$broken{$u}{$u} = 1;
foreach $fragment (keys %{$links{$u}}) {
$broken{$u}{$fragment}++;
@@ -354,10 +364,12 @@ sub check_uri() {
# Display results
if ($_html) {
if (! $_summary) {
- print '</pre><hr>';
+ print "</pre><hr>\n";
}
}
- print "\n";
+ if (! $_quiet) {
+ print "\n";
+ }
&anchors_summary($p->{Anchors}, \%errors);
&links_summary(\%links, \%results, \%broken, \%redirects);
if ($_html) {
@@ -626,27 +638,29 @@ sub check_validity($, $, \%, \%, \%, \%, $) {
}
my $response;
if ($testing eq $uri) {
- # Mimic an HTTP::Response object
+ # Mimic an HTTP::Response object if we already have the document
$results->{$uri}{$uri}{code} = $testing_code;
$results->{$uri}{$uri}{success} = 1;
} else {
$response = &get_uri($method, $uri);
# Record the redirects
&record_redirects($redirects, $response->{Redirects});
- # Parse the document if necessary and possible
+ # Get the information back from get_uri()
$results->{$uri}{$uri}{code} = $response->code();
$results->{$uri}{$uri}{display} = $results->{$uri}{$uri}{code};
$results->{$uri}{$uri}{orig} = $response->{OriginalCode};
+ # Did we get a redirect?
if ($response->{OriginalCode} != $response->code()) {
- $results->{$uri}{$uri}{success} = 0;
$results->{$uri}{$uri}{orig_message} = $response->{OriginalMessage};
- } else {
- $results->{$uri}{$uri}{success} = $response->is_success();
+ $results->{$uri}{$uri}{redirected} = 1;
}
+ $results->{$uri}{$uri}{success} = $response->is_success();
+ # Stores the authentication information
if (defined($response->{Realm})) {
$results->{$uri}{$uri}{realm} = $response->{Realm};
$results->{$uri}{$uri}{display} = 401;
}
+ # Did it fail?
if (! $results->{$uri}{$uri}{success}) {
$results->{$uri}{$uri}{message} = $response->message();
if ($_verbose) {
@@ -660,6 +674,7 @@ sub check_validity($, $, \%, \%, \%, \%, $) {
if ($#fragments == 0) {
return;
}
+ # There are fragments. Parse the document.
my $p;
if ($testing ne $uri) {
if (!(($results->{$uri}{$uri}{type} = $response->header('Content-type')) =~ m/text\/html/i)) {
@@ -749,6 +764,7 @@ sub time_diff() {
# Handle the redirects #
########################
+# Record the redirects in a hash
sub record_redirects(\%, \%) {
my ($redirects, $sub) = @_;
foreach $r (keys %$sub) {
@@ -756,11 +772,13 @@ sub record_redirects(\%, \%) {
}
}
+# Determine if a request is redirected
sub is_redirected($, %) {
my ($uri, %redirects) = @_;
return(defined($redirects{$uri}));
}
+# Get a list of redirects for a URI
sub get_redirects($, %) {
my ($uri, %redirects) = @_;
my @history = ($uri);
@@ -889,17 +907,24 @@ sub links_summary(\%,\%,\%) {
# List of the broken links
@urls = keys %{$broken};
+ if ($_redirects) {
+ # Add the redirected URI's to the report
+ for $l (keys %$redirects) {
+ next unless (defined($results->{$l})
+ && !defined($broken->{$l}));
+ push(@urls, $l);
+ }
+ }
if ($#urls < 0) {
if (! $_quiet && $_html) {
print "<p>Valid links!</p>\n";
}
return;
}
- print "\n";
if ($_html) {
print('<p>');
}
- print('List of broken ');
+ print("\nList of broken ");
if ($_redirects) {
print('and redirected ');
}
@@ -911,13 +936,6 @@ sub links_summary(\%,\%,\%) {
print("</p>\n<table border=\"1\">\n<tr><td><b>Location</b></td><td><b>Code</b></td><td><b>Fragment</b></td><td><b>Lines</b></td></tr>");
}
print("\n");
- if ($_redirects) {
- for $l (keys %$redirects) {
- next unless (defined($results->{$l})
- && !defined($broken->{$l}));
- push(@urls, $l);
- }
- }
foreach $u (@urls) {
my @fragments = keys %{$broken->{$u}};
# Count the rows displayed
@@ -928,13 +946,17 @@ sub links_summary(\%,\%,\%) {
# Did we get a redirect?
my $redirected = &is_redirected($u, %$redirects);
# List of lines
- my $lines_list;
- if (defined($links->{$u}{$u}{-1})) {
- $lines_list = '-';
- } else {
- $lines_list = join(', ',
- sort {$a <=> $b} keys %{$links->{$u}{$u}});
+ my @total_lines;
+ foreach $f (keys %{$links->{$u}}) {
+ if ($f eq $u) {
+ next if (defined($links->{$u}{$u}{-1}));
+ }
+ foreach $l (keys %{$links->{$u}{$f}}) {
+ push (@total_lines, $l);
+ }
}
+ my $lines_list = join(', ',
+ sort {$a <=> $b} @total_lines);
if ($_html) {
# Main info
for (@redirects_urls = &get_redirects($u, %$redirects)) {
@@ -1007,7 +1029,7 @@ sub links_summary(\%,\%,\%) {
join(', ',
sort {$a <=> $b} keys %{$links->{$u}{$f}}));
} else {
- printf("\t%s\tLines: %s\n",
+ printf("\t%-30s\tLines: %s\n",
# Fragment
$f,
# List of lines