summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xhtdocs/favlets.html128
-rwxr-xr-xhtdocs/p3p.html119
-rwxr-xr-xhttpd/cgi-bin/LinkChecker.pl1513
-rwxr-xr-xhttpd/cgi-bin/p3p6
-rwxr-xr-xhttpd/cgi-bin/referers48
-rwxr-xr-xhttpd/cgi-bin/traceroute106
6 files changed, 0 insertions, 1920 deletions
diff --git a/htdocs/favlets.html b/htdocs/favlets.html
deleted file mode 100755
index 309379e..0000000
--- a/htdocs/favlets.html
+++ /dev/null
@@ -1,128 +0,0 @@
-<!--#set var="revision" value="\$Id: favlets.html,v 1.6 2002-10-30 16:46:59 link Exp $"
---><!--#set var="date" value="\$Date: 2002-10-30 16:46:59 $"
---><!--#set var="title" value="Favlets For The W3C MarkUp Validation Service"
---><!--#set var="relroot" value="./"
---><!--#include virtual="header.html" -->
-
- <script type="text/javascript">
-<!--
- var xmlhttp
- /*@cc_on @*/
- /*@if (@_jscript_version >= 5)
- try {
- xmlhttp=new ActiveXObject("Msxml2.XMLHTTP")
- } catch (e) {
- try {
- xmlhttp=new ActiveXObject("Microsoft.XMLHTTP")
- } catch (E) {
- xmlhttp=false
- }
- }
- @else
- xmlhttp=false
- @end @*/
- if (!xmlhttp) {
- try {
- xmlhttp = new XMLHttpRequest()
- } catch (e) {
- xmlhttp=false
- }
- }
- if (xmlhttp && location.hash && location.hash.substr) {
- var hsh=location.hash.substr(1)
- if (hsh.indexOf('http://')==0) {
- url="http://validator.w3.org:8001/check?uri="+hsh+";output=xml";
- xmlhttp.open("HEAD",url,true);
- xmlhttp.onreadystatechange=function() {
- if (xmlhttp.readyState==4) {
- if (xmlhttp.getResponseHeader('X-W3C-Validator-Status')!='') {
- alert(hsh+' is '+xmlhttp.getResponseHeader('X-W3C-Validator-Status')+'\nErrors: '+xmlhttp.getResponseHeader('X-W3C-Validator-Errors'));
- } else {
- alert("Validation result not found, this may be for a number of reasons, including url not being available, or a character coding not detected.")
- }
- }
- }
- xmlhttp.setRequestHeader('User-Agent','Validator+Favlet');
- xmlhttp.send("")
- }
- }
-// -->
- </script>
-
- <div id="skip">
- <h2><a href="http://Favlets.com/">Favlets</a>
- For The Validator</h2>
- <div class="intro">
- <p>
- Favlets are small snippets of JavaScript embedded in a Bookmark
- <acronym title="Universal Resource Identifier">URL</acronym>
- that allows Bookmarks in browsers do various advanced things.
- Popular Favlets include variants that prompts the user for a
- phrase and searches the web for that phrase, or that finds older
- versions of the currently viewed page in the
- <a class="offsite" title="The Internet Archive WayBack Machine"
- href="http://www.archive.org/">WayBack Machine</a>.
- </p>
- <p>
- Favlets depend on support for <code>javascript:</code>
- <acronym title="Universal Resource Identifiers">URLs</acronym>
- in your browser's Bookmarks feature, and each Favlet may depend on
- support for a specific part of the JavaScript specification to work
- properly. <acronym title="Microsoft Internet Explorer">MSIE</acronym>
- versions more recent then 5.0, and Mozilla 1.0 and later &mdash; this
- includes browsers using the embedded version of Mozilla, such as
- Netscape 7.0 &mdash; are known to support most Favlets.
- <acronym title="Microsoft Internet Explorer for Mac OS">MSIE:mac</acronym>
- supports basic Favlets, but it's general support for JavaScript
- may render certain advanced Favlets inoperable. Netscape 4.x is
- a lost cause in this regard, and with it's poor support for standards
- in general it is probably better to avoid it altogether.
- </p>
- </div>
- <div>
- <dl>
- <dt>"<a href="javascript:void(location='http://validator.w3.org/check?uri='+location)"
- title="Validate This Page">Validate <em>This</em> Page</a>"</dt>
- <dd>
- This is the basic "Validate This Page" Favlet.
- It simply submits the URL for the currently viewed page to the
- Validator for processing. Results appear in the same window.
- </dd>
- <dt>"<a href="javascript:window.open('http://validator.w3.org/check?uri='+location);void%200"
- title="Validate This Page In New Window">Validate <em>This</em> Page In New Window</a>"</dt>
- <dd>
- Like the last Favlet, this also submits the URL of the current
- page to the Validator for processing, but this version will show
- the results in a new window.
- </dd>
- <dt>"<a href="javascript:void(q=prompt('Validate Page:',''));if(q)void(location='http://validator.w3.org/check?uri='+escape(q))"
- title="Validate Page...">Validate Page...</a>"</dt>
- <dd>
- Puts up a dialog with a text entry field where you can type in the
- URL of a page you would like to Validate. The results appear in the
- current window.
- </dd>
- <dt>"<a href="javascript:void(q=prompt('Validate Page:',''));if(q)window.open('http://validator.w3.org/check?uri='+escape(q));void%200"
- title="Validate Page In New Window...">Validate Page In New Window...</a>"</dt>
- <dd>Same as above but shows results in a new window.</dd>
- <dt>"<a href="javascript:_is=document.createElement('iframe');_is.setAttribute('src','http://validator.w3.org:8001/favlets.html#'+location.href);_is.setAttribute('height','0');_is.setAttribute('width','0');_is.setAttribute('style','border:0;');document.body.appendChild(_is);void 0"
- title="Is This Page Valid?">Is <em>This</em> Page Valid?</a>"</dt>
- <dd>
- From the Head Of The "JavaScript Juju" Department,
- <a href="http://jibbering.com/">Jim Ley</a>, comes this gem.
- When invoked, this Favlet will submit the
- <acronym title="Universal Resource Identifier">URL</acronym>
- for the current page to the Validator for processing and pop up
- a dialog that shows whether the page is Valid and how many errors
- where found in the page. As with all Jim does, this Black Magic is
- so deep I'm <em>afraid</em> to ask him how it works.
- <q>Here There Be Dragons!</q>
- </dd>
- </dl>
- </div>
- </div>
-
-<!--#include virtual="footer.html" -->
- </body>
-</html>
-
diff --git a/htdocs/p3p.html b/htdocs/p3p.html
deleted file mode 100755
index d23621e..0000000
--- a/htdocs/p3p.html
+++ /dev/null
@@ -1,119 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-
-<html xmlns="http://www.w3.org/1999/xhtml">
-
-<head>
-<title>About the W3C HTML Validation Service</title>
-<meta name="revision"
- content="$Id: p3p.html,v 1.3 2002-12-08 14:39:25 ville Exp $" />
-</head>
-
-<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b">
-
-<p>
- <a href="http://www.w3.org/"><img
- src="http://www.w3.org/Icons/WWW/w3c_home" height="48" border="0"
- alt="W3C" /></a>
-</p>
-
- <h2>About the W3C HTML Validation Service</h2>
-
- <p>
- <a href="./">This service</a> checks HTML documents for
- conformance to W3C HTML and XHTML Recommendations and other
- HTML standards. Related resources include:
- </p>
-
- <ul>
- <li><a href="source/">Information on the source code availability</a></li>
- <li><a href="todo.html">The to-do list for the service</a></li>
- <li><a href="sgml-lib/catalog">DTDs (document types) supported by this
- service</a> (the SGML catalog)</li>
- <li><a href="http://lists.w3.org/Archives/Public/www-validator/">The
- www-validator mailing list</a></li>
- <li><a href="feedback.html">How to provide feedback on this service</a></li>
- <!--
- <li>@@ conneg'd icons
- -->
- </ul>
-
- <h2><a name="others">Other resources</a></h2>
-
- <ul>
- <li><a href="http://www.w3.org/MarkUp/">W3C's HTML home page</a></li>
- <li><a href="http://www.w3.org/TR/html401/">The W3C HTML 4.01
- Recommendation</a></li>
- <li><a href="http://www.w3.org/TR/xhtml1/">The W3C XHTML 1.0
- Recommendation</a></li>
- <li>The <a
- href="http://www.htmlhelp.com/tools/validator/">WDG HTML
- validator</a> is another excellent online validation
- service</li>
- <li><a href="http://arealvalidator.com/">A Real Validator</a>
- is a shareware HTML syntax checker for Windows systems,
- from the author of the WDG validator</li>
- <!--
- <li>@@ bookmarklets</li>
- -->
- </ul>
-
-<h2><a name="credits">Credits</a></h2>
-
- <p>
- This service uses:
- </p>
-
- <ul>
-
- <li>
- <a href="http://www.jclark.com/">James Clark</a>'s excellent <a
- href="http://www.jclark.com/sp/">SGML parser</a>,
- </li>
-
- <li>
- The <a href="http://www.linpro.no/lwp/">libwww-perl library</a>
- by Gisle Aas and Martijn Koster for retrieving documents.
- </li>
-
- <!--
- <li>
- <a href="http://www.weblint.org/~neilb/">Neil Bowers</a>' HTML
- style checker, <a
- href="http://www.weblint.org/">Weblint</a>.
- </li>
-
- <li>
- SGML error explanations written by Scott Bigham.
- </li>
- -->
-
- <li>
- Patches and ideas from Terje Bless, <a
- href="http://www.htmlhelp.com/%7Eliam/">Liam Quinn</a>, and
- others.
- </li>
-
- </ul>
-
-<p>
- The <a
- href="http://lists.w3.org/Archives/Public/www-html/1994Jul/0015">first
- online HTML validation service</a> was created by <a
- href="http://www.w3.org/People/Connolly/">Dan Connolly</a> and <a
- href="http://www.gaither.com/mark/">Mark Gaither</a>.
-</p>
-
-<hr />
-
-<address>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://validator.w3.org/images/vxhtml10" height="31" width="88"
- align="right" border="0" alt="Valid XHTML 1.0!" /></a>
- <a href="/feedback.html">Gerald Oskoboiny</a><br />
- $Date: 2002-12-08 14:39:25 $
-</address>
-
-</body>
-
-</html>
diff --git a/httpd/cgi-bin/LinkChecker.pl b/httpd/cgi-bin/LinkChecker.pl
deleted file mode 100755
index 90cc39e..0000000
--- a/httpd/cgi-bin/LinkChecker.pl
+++ /dev/null
@@ -1,1513 +0,0 @@
-#! /usr/bin/perl -w
-#
-# W3C Link Checker
-# by Hugo Haas
-# (c) 1999-2000 World Wide Web Consortium
-# based on Renaud Bruyeron's checklink.pl
-#
-# $Id: LinkChecker.pl,v 1.33 2000-02-24 22:22:05 hugo Exp $
-#
-# This program is licensed under the W3C(r) License:
-# http://www.w3.org/Consortium/Legal/copyright-software
-#
-# See the CVSweb interface at:
-# http://dev.w3.org/cvsweb/validator/httpd/cgi-bin/LinkChecker.pl
-#
-# An online version is available at:
-# http://validator.w3.org/checklink
-
-use strict;
-
-package W3C::UserAgent;
-require LWP::UserAgent;
-@W3C::UserAgent::ISA = qw(LWP::UserAgent);
-
-package W3C::LinkChecker;
-require HTML::Parser;
-@W3C::LinkChecker::ISA = qw(HTML::Parser);
-
-# Autoflush
-$| = 1;
-
-# Version info
-my $PROGRAM = 'W3C LinkChecker';
-my $VERSION = q$Revision: 1.33 $ . '(c) 1999-2000 W3C';
-my $REVISION; ($REVISION = $VERSION) =~ s/Revision: (\d+\.\d+) .*/$1/;
-
-# Different options specified by the user
-my $_cl;
-my $_quiet = 0;
-my $_summary = 0;
-my $_verbose = 0;
-my $_progress = 0;
-my $_html = 0;
-my $_timeout = 60;
-my $_chunksize = 1024;
-my $_redirects = 1;
-my $_dir_redirects = 1;
-my $_user;
-my $_password;
-my $_trusted = '\.w3\.org';
-my $_http_proxy;
-my $_recursive = 0;
-my $_base_location = '.';
-
-# Global variables
-# Used for the output
-my $first = 2;
-# What is our query?
-my $query;
-# What URI's did we process? (used for $_recursive == 1)
-my %processed;
-# Result of the HTTP query
-my %results;
-# List of redirects
-my %redirects;
-
-if ($#ARGV >= 0) {
- $_cl = 1;
-# Parse command line
- my @uris = &parse_arguments();
- if ($_user && (! $_password)) {
- &ask_password();
- }
- my $uri;
- foreach $uri (@uris) {
- if (! $_summary) {
- printf("%s %s\n", $PROGRAM ,$VERSION);
- } else {
- $_verbose = 0;
- $_progress = 0;
- }
- $uri = urize($uri);
- &check_uri($uri);
- }
-} else {
- use CGI;
- use CGI::Carp qw(fatalsToBrowser);
- $query = new CGI;
- $_cl = 0;
- $_verbose = 0;
- $_progress = 0;
- if ($query->param('summary')) {
- $_summary = 1;
- } else {
- }
- if ($query->param('hide_redirects')) {
- $_redirects = 0;
- }
- if ($query->param('hide_dir_redirects')) {
- $_dir_redirects = 0;
- }
- if ($query->param('recursive')) {
- $_recursive = 1;
- }
- $_html = 1;
- my $uri;
- if ($query->param('uri')) {
- $uri = $query->param('uri');
- } elsif ($query->param('url')) {
- $uri = $query->param('url');
- } else {
- &print_form($query);
- }
- $uri =~ s/^\s+//g;
- if ($uri =~ m/^file:/) {
- &file_uri($uri);
- } elsif (!($uri =~ m/:/)) {
- $uri = 'http://'.$uri;
- }
- &check_uri($uri, 1);
-}
-
-###############################################################################
-
-################################
-# Command line and usage stuff #
-################################
-
-sub parse_arguments() {
- my @uris;
- my $uris = 0;
- while (@ARGV) {
- $_ = shift(@ARGV);
- if ($uris) {
- push(@uris, $_);
- } elsif (m/^--$/) {
- $uris = 1;
- } elsif (m/^-[^-upytdcl]/) {
- if (m/q/) {
- $_quiet = 1;
- $_summary = 1;
- }
- if (m/s/) {
- $_summary = 1;
- }
- if (m/b/) {
- $_redirects = 0;
- }
- if (m/e/) {
- $_dir_redirects = 0;
- }
- if (m/v/) {
- $_verbose = 1;
- }
- if (m/i/) {
- $_progress = 1;
- }
- if (m/h/) {
- $_html = 1;
- }
- if (m/r/) {
- $_recursive = 1;
- }
- } elsif (m/^--help$/) {
- &usage();
- } elsif (m/^--quiet$/) {
- $_quiet = 1;
- } elsif (m/^--summary$/) {
- $_summary = 1;
- } elsif (m/^--broken$/) {
- $_redirects = 0;
- } elsif (m/^--dir-redirects$/) {
- $_dir_redirects = 0;
- } elsif (m/^--verbose$/) {
- $_verbose = 1;
- } elsif (m/^--indicator$/) {
- $_progress = 1;
- } elsif (m/^--html$/) {
- $_html = 1;
- } elsif (m/^--recursive$/) {
- $_recursive = 1;
- } elsif (m/^-l|--location$/) {
- $_base_location = shift(@ARGV);
- } elsif (m/^-u|--user$/) {
- $_user = shift(@ARGV);
- } elsif (m/^-p|--password$/) {
- $_password = shift(@ARGV);
- } elsif (m/^-t|--timeout$/) {
- $_timeout = shift(@ARGV);
- } elsif (m/^-d|--domain$/) {
- $_trusted = shift(@ARGV);
- } elsif (m/^-y|--proxy$/) {
- $_http_proxy = shift(@ARGV);
- } elsif (m/^-c|--chunksize$/) {
- $_chunksize = shift(@ARGV);
- } else {
- push(@uris, $_);
- }
- }
- return(@uris);
-}
-
-sub usage() {
- print STDERR "$PROGRAM $VERSION
-Usage: LinkCheck.pl <options> <uris>
-Options:
- -s/--summary Result summary only.
- -b/--broken Show only the broken links, not the redirects.
- -e/--directory Hide directory redirects - e.g.
- http://www.w3.org/TR -> http://www.w3.org/TR/
- -r/--recursive Check the documents linked from the first one.
- -l/--location uri Scope of the documents checked.
- By default, for
- http://www.w3.org/TR/html4/Overview.html
- for example, it would be:
- http://www.w3.org/TR/html4/
- -q/--quiet No output if no errors are found.
- -v/--verbose Verbose mode.
- -i/--indicator Show progress while parsing.
- -u/--user username Specify a username for authentication.
- -p/--password password Specify a password.
- -t/--timeout value Timeout for the HTTP requests.
- -d/--domain domain Regular expression describing the domain to
- which the authetication information will be
- sent (default: $_trusted).
- -y/--proxy proxy Specify an HTTP proxy server.
- -c/--chunk-size size Size of the blocks parsed (default: $_chunksize).
- -h/--html HTML output.
- --help Show this message.
-";
- exit(0);
-}
-
-sub ask_password() {
- print(STDERR 'Enter your password: ');
- system('stty -echo');
- chomp($_password = <STDIN>);
- system('stty echo');
- print(STDERR "ok.\n");
-}
-
-###############################################################################
-
-###########################################
-# Transform foo into file://localhost/foo #
-###########################################
-
-sub urize() {
- use URI;
- $_ = URI::Escape::uri_unescape($_[0]);
- my $base;
- my $res = $_;
- if (m/:/) {
- $base = URI->new($_);
- } elsif (m/^\//) {
- $base = URI->new('file://localhost');
- } else {
- my $pwd;
- chop($pwd = `pwd`);
- $base = URI->new('file://localhost'.$pwd.'/');
- }
- my $u = URI->new($res);
- my $result = $u->abs($base);
- return($result->as_string());
-}
-
-#######################
-# Do the job on a URI #
-#######################
-
-sub check_uri() {
- my ($uri, $html_stuff) = @_;
-
- if ($_html && $html_stuff) {
- $first = 1;
- } else {
- $first = 0;
- }
-
- my $start;
- if (! $_quiet) {
- $start = &get_timestamp();
- }
-
- # Get the document
- my $response = &get_document('GET', $uri, 1, \%redirects);
-
- if (defined($response->{Stop})) {
- if ($html_stuff) {
- &html_header($uri);
- }
- &hprintf("\nError: %d %s\n",
- $response->code(), $response->message());
- if ($html_stuff) {
- &html_footer();
- }
- return(-1);
- }
-
- if ($_html) {
- if ($html_stuff) {
- &html_header($uri);
- }
- print('<h2>');
- }
-
- my $absolute_uri = $response->{absolute_uri}->as_string();
-
- printf("\nProcessing\t%s\n\n", $absolute_uri);
-
- if ($_html) {
- print("</h2>\n");
- if (! $_summary) {
- print "<pre>\n";
- }
- }
-
- $processed{$absolute_uri} = 1;
- my $p = &parse_document($uri, $absolute_uri,
- $response->content(), 1);
- my $base = URI->new($p->{base});
-
- # Check anchors
- ###############
-
- if (! $_summary) {
- print("Checking anchors:\n");
- }
- my %errors;
- my $anchor;
- foreach $anchor (keys %{$p->{Anchors}}) {
- my @lines = keys %{$p->{Anchors}{$anchor}};
- my $times = $#lines + 1;
- if ($times > 1) {
- $errors{$anchor} = 1;
- }
- if ($anchor eq '') {
- $errors{$anchor} = 1;
- }
- }
- if (! $_summary) {
- print(" done.\n");
- }
-
- # Check links
- #############
-
- my %links;
- # Record all the links
- my $link;
- foreach $link (keys %{$p->{Links}}) {
- my $link_uri = URI->new($link);
- my $abs_link_uri = URI->new_abs($link_uri, $base);
- my $lines;
- foreach $lines (keys %{$p->{Links}{$link}}) {
- my $canonical = URI->new($abs_link_uri->canonical());
- my $url = $canonical->scheme().':'.$canonical->opaque();
- my $fragment = $canonical->fragment();
- if (! $fragment) {
- $links{$url}{location}{$lines} = 1;
- } else {
- $links{$url}{fragments}{$fragment}{$lines} = 1;
- }
- }
- }
-
- # Build the list of broken URI's
- my %broken;
- my $u;
- foreach $u (keys %links) {
- # Don't check mailto: URI's
- next if ($u =~ m/^mailto:/);
- if (! $_summary) {
- &hprintf("Checking link %s\n", $u);
- }
- &check_validity($uri, $u, \%links, \%redirects);
- if ($_verbose) {
- &hprintf("\tReturn code: %s\n", $results{$u}{location}{code});
- }
- if ($results{$u}{location}{success}) {
- my $fragment;
- if ($results{$u}{location}{display} >= 400) {
- $broken{$u}{location} = 1;
- }
- foreach $fragment (keys %{$links{$u}{fragments}}) {
- if ($_verbose) {
- &hprintf("\t\t%s %s - Lines: %s\n",
- $fragment,
- ($results{$u}{fragments}{$fragment}
- ? 'OK' : 'Not found'),
- join(',',
- keys %{$links{$u}{fragments}{$fragment}})
- );
- }
- # A broken fragment?
- if ($results{$u}{fragments}{$fragment} == 0) {
- $broken{$u}{fragments}{$fragment} += 2;
- }
- }
- } else {
- # Couldn't find the document
- $broken{$u}{location} = 1;
- my $fragment;
- foreach $fragment (keys %{$links{$u}{fragments}}) {
- $broken{$u}{fragments}{$fragment}++;
- }
- }
- }
- if (! $_summary) {
- my $stop = &get_timestamp();
- &hprintf("Processed in %ss.\n", &time_diff($start, $stop));
- }
-
- # Display results
- if ($_html) {
- if (! $_summary) {
- print "</pre>\n";
- }
- }
- if (! $_quiet) {
- print "\n";
- }
- &anchors_summary($p->{Anchors}, \%errors);
- &links_summary(\%links, \%results, \%broken, \%redirects);
-
- # Do we want to process other documents?
- if ($_recursive) {
- if ($_base_location eq '.') {
- # Get the name of the original directory
- # e.g. http://www.w3.org/TR/html4/Overview.html
- # should return http://www.w3.org/TR/html4/
- $results{$uri}{parsing}{base} =~ m/^(.*\/)[^\/]*/;
- $_base_location = $1;
- }
- foreach $u (keys %links) {
- next if (! (# Check if it's in our scope for recursion
- ($u =~ m/^$_base_location/) &&
- # and the link is not broken
- $results{$u}{location}{success})
- );
- # Check if we have already processed the URI
- next if (&already_processed($u) != 0);
- # Do the job
- print "\n";
- if (! $_html) {
- my $i = 40;
- while ($i--) {
- print('-');
- }
- } else {
- print('<hr>');
- # For the online version, wait for a while to avoid abuses
- sleep(3);
- }
- print "\n";
- &check_uri($u, 0);
- }
- }
-
- if ($_html && $html_stuff) {
- &html_footer();
- }
-}
-
-#############################
-# Get a document to process #
-#############################
-
-sub get_document() {
- my ($method, $uri, $in_recursion, $redirects) = @_;
-
- # Get the document
- my $response = &get_uri($method, $uri);
- &record_results($uri, $method, $response);
- $first = 0;
- if (! $response->is_success()) {
- if (! $in_recursion) {
- if ($response->code() == 401) {
- &authentication($response);
- } else {
- if ($_html) {
- &html_header($uri);
- }
- &hprintf("Error: %d %s\n",
- $response->code(), $response->message());
- if ($_html) {
- &html_footer();
- }
- }
- }
- $response->{Stop} = 1;
- return($response);
- }
-
- &record_redirects($redirects, $response->{Redirects});
-
- # What URI are we processing by the way?
- my $base_uri = URI->new($response->base());
- my $request_uri = URI->new($response->request->url);
- $response->{absolute_uri} = $base_uri->abs($request_uri);
-
- # Parse the document
- if (! ($response->header('Content-type') =~ m/text\/html/)) {
- if (! $in_recursion) {
- &hprintf("Can't check link: Content-type is '%s'.\n",
- $response->header('Content-type'));
- }
- $response->{Stop} = 1;
- }
-
- # Ok, return the information
- return($response);
-}
-
-##################################################
-# Check whether a URI has already been processed #
-##################################################
-
-sub already_processed($) {
- my ($uri, %redirects) = @_;
- # Don't be verbose for that part
- my $summary_value = $_summary;
- $_summary = 1;
- # Do a HEAD
- my $response = &get_document('HEAD', $uri, 1);
- $_summary = $summary_value;
- # Have we already processed it?
- return(-1) if (defined($response->{Stop}));
- return(1) if (defined($processed{$response->{absolute_uri}->as_string()}));
- return(0);
-}
-
-############################
-# Get the content of a URI #
-############################
-
-sub W3C::UserAgent::simple_request() {
- my $self = shift;
- my $response = $self->W3C::UserAgent::SUPER::simple_request(@_);
- if (! defined($self->{FirstResponse})) {
- $self->{FirstResponse} = $response->code();
- $self->{FirstMessage} = $response->message();
- }
- return $response;
-}
-
-sub W3C::UserAgent::redirect_ok {
- my ($self, $request) = @_;
-
- if (! ($_summary || $first)) {
- &hprintf("\n%s %s ", $request->method(), $request->uri());
- }
-
- $self->{Redirects}{$self->{fetching}} = $request->uri();
- $self->{fetching} = $request->uri();
-
- return 0 if $request->method() eq "POST";
- return 1;
-}
-
-sub get_uri() {
- # Here we have a lot of extra parameters in order not to lose information
- # if the function is called several times (401's)
- my ($method, $uri, $start, $redirects, $code, $realm, $message, $tested) = @_;
- if (! defined($start)) {
- $start = &get_timestamp();
- }
- # Prepare the query
- my $ua = new W3C::UserAgent;
- $ua->timeout($_timeout);
- $ua->agent('W3CLinkChecker/'.$REVISION.' '.$ua->agent());
- if ($_http_proxy) {
- $ua->proxy('http', 'http://'.$_http_proxy);
- }
- $ua->{uri} = $uri;
- $ua->{fetching} = $uri;
- if (defined($redirects)) {
- $ua->{Redirects} = $redirects;
- }
- my $count = 0;
- my $response;
- if (! ($_summary || $first)) {
- &hprintf("%s %s ", $method, $uri);
- }
- my $request = new HTTP::Request($method, $uri);
- # Are we providing authentication info?
- if (defined($tested)
- && ($request->url->netloc =~ /$_trusted$/)) {
- if (defined($ENV{HTTP_AUTHORIZATION})) {
- $request->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION});
- } elsif (defined($_user) && defined($_password)) {
- use MIME::Base64;
- my $authorization = encode_base64($_user.':'.$_password);
- $request->headers->header(Authorization => 'Basic '.$authorization);
- }
- }
- # Do the query
- $response = $ua->request($request);
- # Get the results
- if (! defined($code)) {
- $code = $ua->{FirstResponse};
- }
- if (! defined($message)) {
- $message = $ua->{FirstMessage};
- }
- # Authentication requested?
- if (($response->code() == 401)
- && (defined($ENV{HTTP_AUTHORIZATION})
- || (defined($_user) && defined($_password)))
- && !defined ($tested)) {
- # Deal with authentication and avoid loops
- if (! defined ($realm)) {
- $response->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/;
- $realm = $1;
- }
- if (! ($_summary || $first)) {
- print "\n";
- }
- return &get_uri($method, $response->request->url,
- $start, $ua->{Redirects},
- $code, $realm, $message, 1);
- }
- # Record the redirects
- $response->{Redirects} = $ua->{Redirects};
- my $stop = &get_timestamp();
- if (! ($_summary || $first)) {
- &hprintf(" fetched in %ss\n", &time_diff($start,$stop));
- }
- $response->{OriginalCode} = $code;
- $response->{OriginalMessage} = $message;
- if (defined($realm)) {
- $response->{Realm} = $realm;
- }
- return $response;
-}
-
-#########################################
-# Record the results of an HTTP request #
-#########################################
-
-sub record_results() {
- my ($uri, $method, $response) = @_;
- $results{$uri}{method} = $method;
- $results{$uri}{location}{code} = $response->code();
- $results{$uri}{location}{type} = $response->header('Content-type');
- $results{$uri}{location}{display} = $results{$uri}{location}{code};
- $results{$uri}{location}{orig} = $response->{OriginalCode};
- # Did we get a redirect?
- if ($response->{OriginalCode} != $response->code()) {
- $results{$uri}{location}{orig_message} = $response->{OriginalMessage};
- $results{$uri}{location}{redirected} = 1;
- }
- $results{$uri}{location}{success} = $response->is_success();
- # Stores the authentication information
- if (defined($response->{Realm})) {
- $results{$uri}{location}{realm} = $response->{Realm};
- $results{$uri}{location}{display} = 401;
- }
- if (($results{$uri}{location}{display} == 401)
- && ($results{$uri}{location}{code} == 404)) {
- $results{$uri}{location}{record} = 404;
- } else {
- $results{$uri}{location}{record} = $results{$uri}{location}{display};
- }
- # Did it fail?
- if (! $results{$uri}{location}{success}) {
- $results{$uri}{location}{message} = $response->message();
- if ($_verbose) {
- &hprintf("Error: %d %s\n",
- $results{$uri}{location}{code},
- $results{$uri}{location}{message});
- }
- return;
- }
-}
-
-####################
-# Parse a document #
-####################
-
-sub parse_document() {
- my ($uri, $location, $document, $links) = @_;
-
- my $p;
-
- if (defined($results{$uri}{parsing})) {
- # We have already done the job. Woohoo!
- $p->{base} = $results{$uri}{parsing}{base};
- $p->{Anchors} = $results{$uri}{parsing}{Anchors};
- $p->{Links} = $results{$uri}{parsing}{Links};
- return($p);
- }
-
- my $start;
- $p = W3C::LinkChecker->new();
- # Loose interpretation of the HTML comments since browsers will do the same
- $p->strict_comment(0);
- $p->{base} = $location;
- if (! $_summary) {
- $start = &get_timestamp();
- print("Parsing...\n");
- }
- if (!$_summary || $_progress) {
- $p->{Total} = ($document =~ tr/\n//);
- }
- # We only look for anchors if we are not interested in the links
- # obviously, or if we are running a recursive checking because we
- # might need this information later
- $p->{only_anchors} = !($links || $_recursive);
-
- # Parse small chunks: much faster
- my @chunks = unpack("a$_chunksize"x(length($document)/$_chunksize).'a*',
- $document);
- for (@chunks) {
- $p->parse($_);
- }
-
- if (! $_summary) {
- my $stop = &get_timestamp();
- if ($_progress) {
- print "\r";
- }
- &hprintf(" done (%d lines in %ss).\n",
- $p->{Total}, &time_diff($start, $stop));
- }
-
- # Save the results before exiting
- $results{$uri}{parsing}{base} = $p->{base};
- $results{$uri}{parsing}{Anchors} = $p->{Anchors};
- $results{$uri}{parsing}{Links} = $p->{Links};
-
- return($p);
-}
-
-####################################
-# Constructor for W3C::Linkchecker #
-####################################
-
-sub W3C::LinkChecker::new() {
- my $p = HTML::Parser::new(@_);
-
- # Line count
- $p->{Line} = 1;
- # Attribute for ids in element a
- # Up to XHTML 1.0, it is 'name'. After that it is 'id'.
- $p->{check_name} = 1;
-
- return $p;
-}
-
-#################################################
-# Record or return the doctype of the document #
-#################################################
-
-sub W3C::LinkChecker::doctype() {
- my ($self, $dc) = @_;
- if (! $dc) {
- return $self->{doctype};
- }
- $self->{doctype} = $dc;
- # Check if we should check <a name="..."> or not
- # The good way to do that is to get the DTD and parse it but it is
- # much more complex
- if ($dc eq '-//W3C//DTD XHTML Basic 1.0//EN') {
- $self->{check_name} = 0;
- }
-}
-
-#######################################
-# Count the number of lines in a file #
-#######################################
-
-sub W3C::LinkChecker::new_line() {
- my ($self, $string) = @_;
- my $count = ($string =~ tr/\n//);
- $self->{Line} = $self->{Line} + $count;
- if ($_progress) {
- printf("\r%4d%%", int($self->{Line}/$self->{Total}*100));
- }
-}
-
-#######################################
-# start function used by HTML::Parser #
-#######################################
-
-sub W3C::LinkChecker::start() {
- my ($self, $tag, $attr, $attrseq, $text) = @_;
- my $anchor;
- # Links
- if (!$self->{only_anchors}) {
- my $link;
- $anchor = $attr->{id};
- if (($tag eq 'a')) {
- $link = $attr->{href};
- if ($self->{check_name}) {
- $anchor = $attr->{name};
- }
- } elsif ($tag eq 'img') {
- $link = $attr->{src};
- } elsif (($tag eq 'frame') || ($tag eq 'link')) {
- $link = $attr->{href};
- }
- if (defined($link)) {
- $self->{Links}{$link}{$self->{Line}}++;
- }
- # Just anchors
- } else {
- $anchor = $attr->{id};
- if (($tag eq 'a') && $self->{check_name}) {
- $anchor = $attr->{name};
- }
- }
- if (defined($anchor)) {
- $self->{Anchors}{$anchor}{$self->{Line}}++;
- }
- # Line counting
- if ($text =~ m/\n/) {
- $self->new_line($text);
- }
-}
-
-####################################################
-# Overloading functions for line counting purposes #
-####################################################
-
-# W3C::LinkChecker::text() is called by end(), declaration() and comment()
-sub W3C::LinkChecker::text() {
- my ($self, $text) = @_;
- if (!$_progress) {
- # If we are just extracting information about anchors,
- # parsing this part is only cosmetic (progress indicator)
- return unless !$self->{only_anchors};
- }
- if ($text =~ /\n/) {
- $self->new_line($text);
- }
-}
-
-sub W3C::LinkChecker::end() {
- my $self = shift;
- return unless !$self->{only_anchors};
- shift;
- my $text = shift;
- $self->text($text);
-}
-
-sub W3C::LinkChecker::declaration() {
- my ($self, $text) = @_;
- # Extract the doctype
- my @declaration = split(/\s+/, $text, 4);
- if (($#declaration >= 3) &&
- ($declaration[0] eq 'DOCTYPE') &&
- (lc($declaration[1]) eq 'html')) {
- # Parse the doctype declaration
- $text =~ m/^DOCTYPE\s+html\s+PUBLIC\s+\"([^\"]*)\"(\s+\"([^\"]*)\")?\s*$/i;
- # Store the doctype
- if ($1) {
- $self->doctype($1);
- }
- # If there is a link to the DTD, record it
- if (!$self->{only_anchors} && $3) {
- $self->{Links}{$3}{$self->{Line}}++;
- }
- }
- return unless !$self->{only_anchors};
- $self->text($text);
-}
-
-sub W3C::LinkChecker::comment() {
- my $self = shift;
- return unless !$self->{only_anchors};
- my $text = shift;
- $self->text($text);
-}
-
-################################
-# Check the validity of a link #
-################################
-
-sub check_validity() {
- use HTTP::Status;
- my ($testing, $uri, $links, $redirects) = @_;
-
- # Checking file: URI's is not allowed with a CGI
- if ($testing ne $uri) {
- if ((! $_cl) && (!($testing =~ m/^file:/)) && ($uri =~ m/^file:/)) {
- # Can't test? Return 400 Bad request.
- $results{$uri}{location}{code} = 400;
- $results{$uri}{location}{success} = 0;
- $results{$uri}{location}{message} = 'Error: \'file:\' URI not allowed';
- if ($_verbose) {
- &hprintf("Error: %d %s\n",
- $results{$uri}{location}{code},
- $results{$uri}{location}{message});
- }
- return;
- }
- }
-
- # Get the document with the appropriate method
- my $method;
- my @fragments = keys %{$links->{$uri}{fragments}};
- if ($#fragments == -1) {
- $method = 'HEAD';
- } else {
- $method = 'GET';
- }
-
- my $response;
- my $being_processed = 0;
- if ((! defined($results{$uri}))
- || (($method eq 'GET') && ($results{$uri}{method} eq 'HEAD'))) {
- $being_processed = 1;
- $response = &get_uri($method, $uri);
- # Record the redirects
- &record_redirects($redirects, $response->{Redirects});
- # Get the information back from get_uri()
- &record_results($uri, $method, $response);
- }
-
- if ($#fragments == -1) {
- return;
- }
- # There are fragments. Parse the document.
- my $p;
- if ($being_processed) {
- # Can we really parse the document?
- if (! ($results{$uri}{location}{type} =~ m/text\/html/i)) {
- if ($_verbose) {
- &hprintf("Can't check content: Content-type is '%s'.\n",
- $response->header('Content-type'));
- }
- return;
- }
- # Do it then
- $p = &parse_document($uri, $response->base(),
- $response->as_string(), 0);
- } else {
- # We already had the information
- $p->{Anchors} = $results{$uri}{parsing}{Anchors};
- }
- # Check that the fragments exist
- my $fragment;
- foreach $fragment (keys %{$links->{$uri}{fragments}}) {
- if (defined($p->{Anchors}{$fragment})
- || &escape_match($fragment, $p->{Anchors})) {
- $results{$uri}{fragments}{$fragment} = 1;
- } else {
- $results{$uri}{fragments}{$fragment} = 0;
- }
- }
-}
-
-sub escape_match($, \%) {
- use URI::Escape;
- my ($a, $hash) = (uri_unescape($_[0]), $_[1]);
- foreach $b (keys %$hash) {
- if ($a eq uri_unescape($b)) {
- return(1);
- }
- }
- return(0);
-}
-
-##########################
-# Ask for authentication #
-##########################
-
-sub authentication() {
- my $r = $_[0];
- $r->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/;
- my $realm = $1;
- my $authHeader = $r->headers->www_authenticate;
- if ($_cl) {
- printf(STDERR "\nAuthentication is required for %s.\n", $r->request->url);
- printf(STDERR "The realm is %s.\n", $realm);
- print(STDERR "Use the -u and -p options to specify a username and password.\n");
- } else {
- printf("Status: 401 Authorization Required\nWWW-Authenticate: %s\nConnection: close\nContent-Type: text/html\n\n", $r->headers->www_authenticate);
- printf("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">
-<html>
-<head>
-<title>401 Authorization Required</title>
-</head>
-<body>
-<h1>Authorization Required</h1>
-<p>You need %s access to %s to perform Link Checking.</p>
-</body>
-</html>
-", $realm, $r->request->url);
- }
-}
-
-##################
-# Get statistics #
-##################
-
-sub get_timestamp() {
- require 'sys/syscall.ph';
- my $timestamp = pack('LL', ());
- syscall(&SYS_gettimeofday, $timestamp, 0) != -1 or $timestamp = 0;
- return($timestamp);
-}
-
-sub time_diff() {
- my @start = unpack('LL', $_[0]);
- my @stop = unpack('LL', $_[1]);
- for ($start[1], $stop[1]) {
- $_ /= 1_000_000;
- }
- return(sprintf("%.2f", ($stop[0]+$stop[1])-($start[0]+$start[1])));
-}
-
-########################
-# Handle the redirects #
-########################
-
-# Record the redirects in a hash
-sub record_redirects(\%, \%) {
- my ($redirects, $sub) = @_;
- my $r;
- foreach $r (keys %$sub) {
- $redirects->{$r} = $sub->{$r};
- }
-}
-
-# Determine if a request is redirected
-sub is_redirected($, %) {
- my ($uri, %redirects) = @_;
- return(defined($redirects{$uri}));
-}
-
-# Get a list of redirects for a URI
-sub get_redirects($, %) {
- my ($uri, %redirects) = @_;
- my @history = ($uri);
- my $origin = $uri;
- my %seen;
- while ($redirects{$uri}) {
- $uri = $redirects{$uri};
- push(@history, $uri);
- last if ($uri eq $origin);
- }
- return(@history);
-}
-
-####################################################
-# Tool for sorting the unique elements of an array #
-####################################################
-
-sub sort_unique() {
- my %saw;
- @saw{@_} = ();
- return (sort { $a <=> $b } keys %saw);
-}
-
-#####################
-# Print the results #
-#####################
-
-sub anchors_summary(\%, \%) {
- my ($anchors, $errors) = @_;
- # Number of anchors found.
- if (! $_quiet) {
- if ($_html) {
- print('<p>');
- }
- my @anchors = keys %{$anchors};
- &hprintf("Found %d anchors.", $#anchors+1);
- if ($_html) {
- print('</p>');
- }
- print("\n");
- }
- # List of the duplicates, if any.
- my @errors = keys %{$errors};
- if ($#errors < 0) {
- if (! $_quiet && $_html) {
- print "<p>Valid anchors!</p>\n";
- }
- return;
- }
- if ($_html) {
- print('<p>');
- }
- print('List of duplicate and empty anchors:');
- if ($_html) {
- print("</p>\n<table border=\"1\">\n<tr><td><b>Anchors</b></td><td><b>Lines</b></td></tr>");
- }
- print("\n");
- my $anchor;
- foreach $anchor (@errors) {
- my $format;
- if ($_html) {
- $format = "<tr><td>%s</td><td>%s</td></tr>\n";
- } else {
- $format = "\t%s\tLines: %s\n";
- }
- printf($format,
- &encode($anchor eq '' ? 'Empty anchor' : $ anchor),
- join(', ', &sort_unique(keys %{$anchors->{$anchor}})));
- }
- if ($_html) {
- print("</table>\n");
- }
-}
-
-sub show_link_report {
- my ($links, $results, $broken, $redirects, $urls) = @_;
-
- # Head of the table
- if ($_html) {
- print("\n<table border=\"1\">\n<tr><td><b>Location</b></td><td><b>Code</b></td><td><b>Fragment</b></td><td><b>Lines</b></td></tr>");
- }
- print("\n");
-
- # Process each URL
- my $u;
- foreach $u (@{$urls}) {
- my @fragments = keys %{$broken->{$u}{fragments}};
- # Count the rows displayed
- my $n_fragments = $#fragments+1;
- if (!defined($broken->{$u}{location})) {
- $n_fragments++;
- }
- # Did we get a redirect?
- my $redirected = &is_redirected($u, %$redirects);
- # List of lines
- my @total_lines;
- my ($f, $l);
- foreach $l (keys %{$links->{$u}{location}}) {
- push (@total_lines, $l);
- }
- foreach $f (keys %{$links->{$u}{fragments}}) {
- if ($f eq $u) {
- next if (defined($links->{$u}{$u}{-1}));
- }
- my $l;
- foreach $l (keys %{$links->{$u}{fragments}{$f}}) {
- push (@total_lines, $l);
- }
- }
- my $lines_list = join(', ',
- &sort_unique(@total_lines));
- if ($_html) {
- # Main info
- my @redirects_urls = &get_redirects($u, %$redirects);
- for (@redirects_urls) {
- $_ = &show_url($_);
- }
- printf("<tr><th rowspan=\"%d\"%s>%s</th><th rowspan=\"%d\"%s>%d%s%s%s%s</th><td>%s</td><td%s>%s</td></tr>\n",
- # Number of fragments (for rowspan)
- $n_fragments,
- # Color
- &bgcolor($results->{$u}{location}{record}),
- # List of redirects
- $redirected ? join('<br>-&gt; ',
- @redirects_urls) : &show_url($u),
- # Number of fragments (for rowspan)
- $n_fragments,
- # Color
- &bgcolor($results->{$u}{location}{orig}),
- # Original HTTP reply
- $results->{$u}{location}{orig},
- # Final HTTP reply
- ($results->{$u}{location}{code} != $results->{$u}{location}{orig})
- ? '-&gt; '.&encode($results->{$u}{location}{code})
- : '',
- # Realm
- defined($results->{$u}{location}{realm})
- ? '<br>Realm: '.&encode($results->{$u}{location}{realm})
- : '',
- # HTTP original message
- defined($results->{$u}{location}{orig_message})
- ? '<br>'.&encode($results->{$u}{location}{orig_message}).' -&gt;'
- : '',
- # HTTP final message
- $results->{$u}{location}{message}
- ? '<br>'.&encode($results->{$u}{location}{message})
- : '',
- '',
- # Color again
- &bgcolor($results->{$u}{location}{code}),
- # List of lines
- $lines_list);
- } else {
- printf("\n%s\t%s\n Code: %d%s %s\n",
- # List of redirects
- $redirected ? join("\n-> ",
- &get_redirects($u, %$redirects)) : $u,
- # List of lines
- $lines_list ? 'Lines: '.$lines_list : '' ,
- # Original HTTP reply
- $results->{$u}{location}{orig},
- # Final HTTP reply
- ($results->{$u}{location}{code} != $results->{$u}{location}{orig})
- ? ' -> '.$results->{$u}{location}{code}
- : '',
- # HTTP message
- $results->{$u}{location}{message} ? $results->{$u}{location}{message} : '');
- }
- # Fragments
- foreach $f (@fragments) {
- next if ($f eq $u);
- if ($_html) {
- my $color = ($broken->{$u}{fragments}{$f} > 1) ? &bgcolor(404) : &bgcolor($results->{$u}{location}{code});
- printf("<tr><td%s>%s</td><td%s>%s</td></tr>\n",
- # Color
- $color,
- # Broken fragment
- &show_url($u, $f),
- # Color
- $color,
- # List of lines
- join(', ',
- &sort_unique(keys %{$links->{$u}{fragments}{$f}})));
- } else {
- printf("\t%-30s\tLines: %s\n",
- # Fragment
- $f,
- # List of lines
- join(', ',
- &sort_unique(keys %{$links->{$u}{fragments}{$f}})));
- }
- }
- }
- # End of the table
- if ($_html) {
- print("</table>\n");
- }
-}
-
-sub links_summary {
- # Advices to fix the problems
-
- my %todo = ( 200 => 'nothing !',
- 300 => 'it usually means that there is a typo in a link that triggers <strong>mod_speling</strong> action - this should be fixed',
- 301 => 'usually nothing, unless the end point of the redirect is broken (in which case, the <B>Code</B> column is RED)',
- 302 => 'usually nothing, unless the end point of the redirect is broken (in which case, the <B>Code</B> column is RED)',
- 400 => 'Usually the sign of a malformed URL that cannot be parsed by the server',
- 401 => 'The link is not public. You had better specify it.',
- 403 => 'The link is forbidden ! This needs fixing. Usual suspect: a missing Overview.html or index.html, or bad access control',
- 404 => 'The link is broken. Fix it <B>NOW</B>',
- 405 => 'The server does not allow HEAD requests. How liberal. Go ask the guys who run this server why.',
- 407 => 'The link is a proxy, but requires Authentication.',
- 408 => 'The request timed out',
- 415 => 'The media type is not supported.',
- 500 => 'The server failed. It is a server side problem.',
- 501 => 'HEAD or GET is not implemented on this server... What kind of server is that?',
- 503 => 'The server cannot service the request, for some unknown reason');
-
- my ($links, $results, $broken, $redirects) = @_;
-
- # Count the links. Useless but interesting.
- if (! $_quiet) {
- if ($_html) {
- print("\n\n<p>");
- }
- my @links = keys %$links;
- my $n_fragments = 0;
- my $n_total = 0;
- my $u;
- # Give a few stats
- foreach $u (@links) {
- my @fragments = keys %{$links->{$u}{fragments}};
- $n_fragments += $#fragments + 1;
- if (defined($links->{$u}{location})) {
- $n_fragments++;
- }
- my ($f, $l);
- foreach $l (keys %{$links->{$u}{location}}) {
- $n_total += $links->{$u}{location}{$l};
- }
- foreach $f (@fragments) {
- my @lines = keys %{$links->{$u}{fragments}{$f}};
- foreach $l (@lines) {
- $n_total += $links->{$u}{fragments}{$f}{$l};
- }
- }
- }
- &hprintf("Found %d locations for %d unique URI's (%d total).",
- $#links+1, $n_fragments, $n_total);
- if ($_html) {
- print('</p>');
- }
- print("\n");
- }
-
- # Print a summary
- if ($_html) {
- my %code_summary;
- my $u;
- foreach $u (keys %$links) {
- if (defined($results->{$u}{location}{record})) {
- my $c;
- if ($results->{$u}{location}{record} == 200) {
- $c = $results->{$u}{location}{orig};
- } else {
- $c = $results->{$u}{location}{record};
- }
- $code_summary{$c}++;
- }
- }
- print "<table border=\"1\">\n<tr><td><b>Code</b></td><td><b>Occurences</b></td><td><b>What to do</b></td></tr>\n";
- my $code;
- foreach $code (sort(keys(%code_summary))) {
- printf("<tr%s>", &bgcolor($code));
- printf("<td>%s</td>", $code);
- printf("<td>%s</td>", $code_summary{$code});
- printf("<td>%s</td>", $todo{$code});
- print "</tr>\n";
- }
- print "</table>\n";
- }
-
- # List of the broken links
- my @urls = keys %{$broken};
- my @dir_redirect_urls = ();
- if ($_redirects) {
- # Add the redirected URI's to the report
- my $l;
- for $l (keys %$redirects) {
- next unless (defined($results->{$l})
- && defined($links->{$l})
- && !defined($broken->{$l}));
- # Check whether we have a "directory redirect"
- # e.g. http://www.w3.org/TR -> http://www.w3.org/TR/
- my @redirects = &get_redirects($l, %$redirects);
- if (($#redirects == 1)
- && (($redirects[0].'/') eq $redirects[1])) {
- push(@dir_redirect_urls, $l);
- next;
- }
- push(@urls, $l);
- }
- }
-
- # Broken links and redirects
- if ($#urls < 0) {
- if (! $_quiet && $_html) {
- print "<p>Valid links!</p>\n";
- }
- } else {
- if ($_html) {
- print('<p>');
- }
- print("\nList of broken links");
- if ($_redirects) {
- print(' and redirects');
- }
- print(':');
- if ($_html) {
- print('<br>Broken fragments appear in red</p>');
- }
- &show_link_report($links, $results, $broken, $redirects,
- \@urls);
- }
-
- # Show directory redirects
- if ($_redirects && $_dir_redirects && ($#dir_redirect_urls > -1)) {
- if ($_html) {
- print('<p>');
- }
- print("\nList of directory redirects:");
- &show_link_report($links, $results, $broken, $redirects,
- \@dir_redirect_urls);
- }
-}
-
-###############################################################################
-
-##################
-# HTML interface #
-##################
-
-sub html_header() {
- my $uri = &encode($_[0]);
- # Cache control?
- if (defined($_[1])) {
- print "Cache-Control: no-cache\nPragma: no-cache\n";
- }
- print "Content-type: text/html
-
-<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">
-<html>
-<head>
-<title>W3C Link Ckecker: $uri</title>
-<style type=\"text/css\">
-
-BODY {
- font-family: sans-serif;
- color: black;
- background: white;
-}
-
-A:link, A:active {
- color: #00E;
- background: transparent;
-}
-
-A:visited {
- color: #529;
- background: transparent;
-}
-
-PRE {
- font-family: monospace
-}
-
-IMG {
- color: white;
- border: none;
-}
-
-</style>
-</head>
-<body>
-<a href=\"http://www.w3.org/\"><img alt=\"W3C\" src=\"http://www.w3.org/Icons/w3c_home\" height=\"48\" width=\"72\"></a>
-<h1>W3C<sup>&reg;</sup> Link Checker: $uri</h1>
-\n";
-}
-
-sub bgcolor() {
- my ($code) = @_;
- my $color;
- my $r = HTTP::Response->new($code);
- if ($r->is_success()) {
- return '';
- }
- if ($code == 300) {
- return ' bgcolor="magenta"';
- }
- if ($code == 401) {
- return ' bgcolor="aqua"';
- }
- if ($r->is_redirect()) {
- return ' bgcolor="yellow"';
- }
- if ($r->is_error()) {
- return ' bgcolor="red"';
- }
- return ' bgcolor="grey"';
-}
-
-sub show_url() {
- my ($url, $fragment) = @_;
- if (defined($fragment)) {
- $url .= '#'.$fragment;
- }
- return('<a href="'.$url.'">'.&encode(defined($fragment) ? $fragment : $url).'</a>');
-}
-
-sub html_footer() {
- print "
-<hr>
-<address>
-$PROGRAM $VERSION<br>
-Report bugs to <a href=\"mailto:hugo\@w3.org\">Hugo Haas</a>.
-Check out the <a href=\"http://dev.w3.org/cvsweb/validator/httpd/cgi-bin/LinkChecker.pl\">source code</a>.
-</address>
-</body>
-</html>
-";
-}
-
-sub file_uri() {
- my $uri = $_[0];
- &html_header($uri);
- print "<h2>Forbidden</h2>
-<p>You cannot check such a URI (<code>$uri</code>).</p>
-";
- &html_footer();
- exit;
-}
-
-sub print_form() {
- my ($q) = @_;
- &html_header($VERSION, 1);
- print "<form action=\"".$q->self_url()."\" method=\"get\">
-<p>Enter the URI that you want to check:</p>
-<p><input type=\"text\" size=\"50\" name=\"uri\"></p>
-<p>Options:</p>
-<p>
- <input type=\"checkbox\" name=\"summary\"> Summary only
- <br>
- <input type=\"checkbox\" name=\"hide_redirects\"> Hide redirects
- <br>
- <input type=\"checkbox\" name=\"hide_dir_redirects\"> Hide directory redirects
- <br>
- <input type=\"checkbox\" name=\"recursive\"> Check linked documents recursively
-</p>
-<p><input type=\"submit\" name=\"submit\" value=\"Check\"></p>
-</form>
-";
- &html_footer();
- exit;
-}
-
-sub encode() {
- if (! $_html) {
- return @_;
- } else {
- return HTML::Entities::encode(@_);
- }
-}
-
-sub hprintf() {
- if (! $_html) {
- printf(@_);
- } else {
- print HTML::Entities::encode(sprintf($_[0], @_[1..@_-1]));
- }
-}
diff --git a/httpd/cgi-bin/p3p b/httpd/cgi-bin/p3p
deleted file mode 100755
index 5e272fd..0000000
--- a/httpd/cgi-bin/p3p
+++ /dev/null
@@ -1,6 +0,0 @@
-#! perl
-print "Content-type: text/html
-
-<html><body>koike</body></html>
-";
-
diff --git a/httpd/cgi-bin/referers b/httpd/cgi-bin/referers
deleted file mode 100755
index e8abf54..0000000
--- a/httpd/cgi-bin/referers
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/sh
-#
-# referers: CGI script to show the top referers of the 50,000 most recent
-# hits to this server.
-#
-# This source code is available under the license at:
-# http://www.w3.org/Consortium/Legal/copyright-software
-#
-# $Id: referers,v 1.8 2000-05-08 02:12:31 gerald Exp $
-
-LOG=/usr/local/apache/logs/validator_log; export LOG
-
-cat <<EOHD
-Content-Type: text/html
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
-
-<title>top referers to validator.w3.org</title>
-
-<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b">
-
-<p>
- This page lists the top 100 referers to <a href="/">the W3C Validation
- Service</a> (taken from the most recent 50,000 log entries.)
-</p>
-
-<table>
-EOHD
-
-tail -50000 $LOG | egrep -v image/png | egrep -v image/gif | \
- cut -d\" -f4 | sort | uniq -c | \
- grep -v ' \-$' | sort -rn | head -100 | sed 's/\&/\&amp;/g; s/</\&lt;/g' | \
- awk '{printf("<tr><td>%s</td><td><a href=\"%s\">%s</a></td></tr>\n",\
- $1, $2, $2)}'
-
-cat <<EOHD
-</table>
-
-<hr>
-<address>
-<a href="http://www.w3.org/People/Gerald/">Gerald Oskoboiny</a><br>
-\$Date: 2000-05-08 02:12:31 $ \
-</address>
-
-EOHD
-
-exit
-
diff --git a/httpd/cgi-bin/traceroute b/httpd/cgi-bin/traceroute
deleted file mode 100755
index 9816c0b..0000000
--- a/httpd/cgi-bin/traceroute
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/local/bin/perl
-#
-# traceroute: a CGI script that provides a Web interface to traceroute
-#
-# Copyright 1998 Gerald Oskoboiny <gerald@w3.org>
-#
-# This source code is available under the license at:
-# http://www.w3.org/Consortium/Legal/copyright-software
-#
-# $Id: traceroute,v 1.3 2001-12-21 23:39:27 gerald Exp $
-#
-
-print <<"EOF";
-Content-Type: text/html
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
-<html>
-
-<head>
- <title>service discontinued</title>
-</head>
-
-<body>
-
-<p>
- Sorry, the traceroute service that used to be here has been turned off due to
- abuse. There are a number of other <a
- href="http://www.google.com/search?q=traceroute">traceroute gateways</a>
- online, please use one of those instead. (if you are specifically interested
- in the route from MIT, see <a
- href="http://jis.mit.edu:8001/cgi-bin/traceroute">TraceRoute from MIT</a>.)
-</p>
-
-<hr>
-<address>
-<a href="http://www.w3.org/People/Gerald/">Gerald Oskoboiny</a><br>
-\$Date: 2001-12-21 23:39:27 $ \
-</address>
-
-</body>
-EOF
-
-exit; # turned off due to abuse; the real script follows:
-
-$| = 1;
-
-# accept either traceroute/foo or traceroute?foo; default to REMOTE_ADDR
-# if nothing else is specified
-$addr = $ENV{PATH_INFO} || $ENV{QUERY_STRING} || $ENV{REMOTE_ADDR};
-
-$addr =~ tr/+/ /;
-$addr =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
-$addr =~ s,^addr=,,;
-$addr =~ s/[^A-Za-z0-9\.-]//g; # for security
-
-print <<"EOF";
-Content-Type: text/html
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
-<html>
-
-<head>
- <title>Traceroute from validator.w3.org to $addr</title>
- <link rev="made" href="mailto:gerald\@w3.org">
-</head>
-
-<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b">
-
-<p>
- Here is the result of a traceroute from <code>validator.w3.org</code>
- to <code>$addr</code>:
-</p>
-
-<form action="/traceroute">
- <input type=text name=addr size=40 value="$addr">
- <input type=submit value="Traceroute">
- <input type=reset>
-</form>
-
-<pre>
-EOF
-
-open( TRACEROUTE, "/usr/sbin/traceroute $addr | " ) ||
- die "couldn't open pipe to traceroute! $!";
-
-while (<TRACEROUTE>) {
- chomp;
- s/\&/\&amp;/g;
- s/\</\&lt;/g;
- print "$_\n";
-}
-close( TRACEROUTE ) || die "couldn't close pipe to traceroute! $!";
-
-print <<"EOF";
-</pre>
-
-<hr>
-<address>
-<a href="http://www.w3.org/People/Gerald/">Gerald Oskoboiny</a><br>
-\$Date: 2001-12-21 23:39:27 $ \
-</address>
-
-EOF
-
-exit;
-