diff options
-rwxr-xr-x | htdocs/favlets.html | 128 | ||||
-rwxr-xr-x | htdocs/p3p.html | 119 | ||||
-rwxr-xr-x | httpd/cgi-bin/LinkChecker.pl | 1513 | ||||
-rwxr-xr-x | httpd/cgi-bin/p3p | 6 | ||||
-rwxr-xr-x | httpd/cgi-bin/referers | 48 | ||||
-rwxr-xr-x | httpd/cgi-bin/traceroute | 106 |
6 files changed, 0 insertions, 1920 deletions
diff --git a/htdocs/favlets.html b/htdocs/favlets.html deleted file mode 100755 index 309379e..0000000 --- a/htdocs/favlets.html +++ /dev/null @@ -1,128 +0,0 @@ -<!--#set var="revision" value="\$Id: favlets.html,v 1.6 2002-10-30 16:46:59 link Exp $" ---><!--#set var="date" value="\$Date: 2002-10-30 16:46:59 $" ---><!--#set var="title" value="Favlets For The W3C MarkUp Validation Service" ---><!--#set var="relroot" value="./" ---><!--#include virtual="header.html" --> - - <script type="text/javascript"> -<!-- - var xmlhttp - /*@cc_on @*/ - /*@if (@_jscript_version >= 5) - try { - xmlhttp=new ActiveXObject("Msxml2.XMLHTTP") - } catch (e) { - try { - xmlhttp=new ActiveXObject("Microsoft.XMLHTTP") - } catch (E) { - xmlhttp=false - } - } - @else - xmlhttp=false - @end @*/ - if (!xmlhttp) { - try { - xmlhttp = new XMLHttpRequest() - } catch (e) { - xmlhttp=false - } - } - if (xmlhttp && location.hash && location.hash.substr) { - var hsh=location.hash.substr(1) - if (hsh.indexOf('http://')==0) { - url="http://validator.w3.org:8001/check?uri="+hsh+";output=xml"; - xmlhttp.open("HEAD",url,true); - xmlhttp.onreadystatechange=function() { - if (xmlhttp.readyState==4) { - if (xmlhttp.getResponseHeader('X-W3C-Validator-Status')!='') { - alert(hsh+' is '+xmlhttp.getResponseHeader('X-W3C-Validator-Status')+'\nErrors: '+xmlhttp.getResponseHeader('X-W3C-Validator-Errors')); - } else { - alert("Validation result not found, this may be for a number of reasons, including url not being available, or a character coding not detected.") - } - } - } - xmlhttp.setRequestHeader('User-Agent','Validator+Favlet'); - xmlhttp.send("") - } - } -// --> - </script> - - <div id="skip"> - <h2><a href="http://Favlets.com/">Favlets</a> - For The Validator</h2> - <div class="intro"> - <p> - Favlets are small snippets of JavaScript embedded in a Bookmark - <acronym title="Universal Resource Identifier">URL</acronym> - that allows Bookmarks in browsers do various advanced things. - Popular Favlets include variants that prompts the user for a - phrase and searches the web for that phrase, or that finds older - versions of the currently viewed page in the - <a class="offsite" title="The Internet Archive WayBack Machine" - href="http://www.archive.org/">WayBack Machine</a>. - </p> - <p> - Favlets depend on support for <code>javascript:</code> - <acronym title="Universal Resource Identifiers">URLs</acronym> - in your browser's Bookmarks feature, and each Favlet may depend on - support for a specific part of the JavaScript specification to work - properly. <acronym title="Microsoft Internet Explorer">MSIE</acronym> - versions more recent then 5.0, and Mozilla 1.0 and later — this - includes browsers using the embedded version of Mozilla, such as - Netscape 7.0 — are known to support most Favlets. - <acronym title="Microsoft Internet Explorer for Mac OS">MSIE:mac</acronym> - supports basic Favlets, but it's general support for JavaScript - may render certain advanced Favlets inoperable. Netscape 4.x is - a lost cause in this regard, and with it's poor support for standards - in general it is probably better to avoid it altogether. - </p> - </div> - <div> - <dl> - <dt>"<a href="javascript:void(location='http://validator.w3.org/check?uri='+location)" - title="Validate This Page">Validate <em>This</em> Page</a>"</dt> - <dd> - This is the basic "Validate This Page" Favlet. - It simply submits the URL for the currently viewed page to the - Validator for processing. Results appear in the same window. - </dd> - <dt>"<a href="javascript:window.open('http://validator.w3.org/check?uri='+location);void%200" - title="Validate This Page In New Window">Validate <em>This</em> Page In New Window</a>"</dt> - <dd> - Like the last Favlet, this also submits the URL of the current - page to the Validator for processing, but this version will show - the results in a new window. - </dd> - <dt>"<a href="javascript:void(q=prompt('Validate Page:',''));if(q)void(location='http://validator.w3.org/check?uri='+escape(q))" - title="Validate Page...">Validate Page...</a>"</dt> - <dd> - Puts up a dialog with a text entry field where you can type in the - URL of a page you would like to Validate. The results appear in the - current window. - </dd> - <dt>"<a href="javascript:void(q=prompt('Validate Page:',''));if(q)window.open('http://validator.w3.org/check?uri='+escape(q));void%200" - title="Validate Page In New Window...">Validate Page In New Window...</a>"</dt> - <dd>Same as above but shows results in a new window.</dd> - <dt>"<a href="javascript:_is=document.createElement('iframe');_is.setAttribute('src','http://validator.w3.org:8001/favlets.html#'+location.href);_is.setAttribute('height','0');_is.setAttribute('width','0');_is.setAttribute('style','border:0;');document.body.appendChild(_is);void 0" - title="Is This Page Valid?">Is <em>This</em> Page Valid?</a>"</dt> - <dd> - From the Head Of The "JavaScript Juju" Department, - <a href="http://jibbering.com/">Jim Ley</a>, comes this gem. - When invoked, this Favlet will submit the - <acronym title="Universal Resource Identifier">URL</acronym> - for the current page to the Validator for processing and pop up - a dialog that shows whether the page is Valid and how many errors - where found in the page. As with all Jim does, this Black Magic is - so deep I'm <em>afraid</em> to ask him how it works. - <q>Here There Be Dragons!</q> - </dd> - </dl> - </div> - </div> - -<!--#include virtual="footer.html" --> - </body> -</html> - diff --git a/htdocs/p3p.html b/htdocs/p3p.html deleted file mode 100755 index d23621e..0000000 --- a/htdocs/p3p.html +++ /dev/null @@ -1,119 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> - -<html xmlns="http://www.w3.org/1999/xhtml"> - -<head> -<title>About the W3C HTML Validation Service</title> -<meta name="revision" - content="$Id: p3p.html,v 1.3 2002-12-08 14:39:25 ville Exp $" /> -</head> - -<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b"> - -<p> - <a href="http://www.w3.org/"><img - src="http://www.w3.org/Icons/WWW/w3c_home" height="48" border="0" - alt="W3C" /></a> -</p> - - <h2>About the W3C HTML Validation Service</h2> - - <p> - <a href="./">This service</a> checks HTML documents for - conformance to W3C HTML and XHTML Recommendations and other - HTML standards. Related resources include: - </p> - - <ul> - <li><a href="source/">Information on the source code availability</a></li> - <li><a href="todo.html">The to-do list for the service</a></li> - <li><a href="sgml-lib/catalog">DTDs (document types) supported by this - service</a> (the SGML catalog)</li> - <li><a href="http://lists.w3.org/Archives/Public/www-validator/">The - www-validator mailing list</a></li> - <li><a href="feedback.html">How to provide feedback on this service</a></li> - <!-- - <li>@@ conneg'd icons - --> - </ul> - - <h2><a name="others">Other resources</a></h2> - - <ul> - <li><a href="http://www.w3.org/MarkUp/">W3C's HTML home page</a></li> - <li><a href="http://www.w3.org/TR/html401/">The W3C HTML 4.01 - Recommendation</a></li> - <li><a href="http://www.w3.org/TR/xhtml1/">The W3C XHTML 1.0 - Recommendation</a></li> - <li>The <a - href="http://www.htmlhelp.com/tools/validator/">WDG HTML - validator</a> is another excellent online validation - service</li> - <li><a href="http://arealvalidator.com/">A Real Validator</a> - is a shareware HTML syntax checker for Windows systems, - from the author of the WDG validator</li> - <!-- - <li>@@ bookmarklets</li> - --> - </ul> - -<h2><a name="credits">Credits</a></h2> - - <p> - This service uses: - </p> - - <ul> - - <li> - <a href="http://www.jclark.com/">James Clark</a>'s excellent <a - href="http://www.jclark.com/sp/">SGML parser</a>, - </li> - - <li> - The <a href="http://www.linpro.no/lwp/">libwww-perl library</a> - by Gisle Aas and Martijn Koster for retrieving documents. - </li> - - <!-- - <li> - <a href="http://www.weblint.org/~neilb/">Neil Bowers</a>' HTML - style checker, <a - href="http://www.weblint.org/">Weblint</a>. - </li> - - <li> - SGML error explanations written by Scott Bigham. - </li> - --> - - <li> - Patches and ideas from Terje Bless, <a - href="http://www.htmlhelp.com/%7Eliam/">Liam Quinn</a>, and - others. - </li> - - </ul> - -<p> - The <a - href="http://lists.w3.org/Archives/Public/www-html/1994Jul/0015">first - online HTML validation service</a> was created by <a - href="http://www.w3.org/People/Connolly/">Dan Connolly</a> and <a - href="http://www.gaither.com/mark/">Mark Gaither</a>. -</p> - -<hr /> - -<address> - <a href="http://validator.w3.org/check/referer"><img - src="http://validator.w3.org/images/vxhtml10" height="31" width="88" - align="right" border="0" alt="Valid XHTML 1.0!" /></a> - <a href="/feedback.html">Gerald Oskoboiny</a><br /> - $Date: 2002-12-08 14:39:25 $ -</address> - -</body> - -</html> diff --git a/httpd/cgi-bin/LinkChecker.pl b/httpd/cgi-bin/LinkChecker.pl deleted file mode 100755 index 90cc39e..0000000 --- a/httpd/cgi-bin/LinkChecker.pl +++ /dev/null @@ -1,1513 +0,0 @@ -#! /usr/bin/perl -w -# -# W3C Link Checker -# by Hugo Haas -# (c) 1999-2000 World Wide Web Consortium -# based on Renaud Bruyeron's checklink.pl -# -# $Id: LinkChecker.pl,v 1.33 2000-02-24 22:22:05 hugo Exp $ -# -# This program is licensed under the W3C(r) License: -# http://www.w3.org/Consortium/Legal/copyright-software -# -# See the CVSweb interface at: -# http://dev.w3.org/cvsweb/validator/httpd/cgi-bin/LinkChecker.pl -# -# An online version is available at: -# http://validator.w3.org/checklink - -use strict; - -package W3C::UserAgent; -require LWP::UserAgent; -@W3C::UserAgent::ISA = qw(LWP::UserAgent); - -package W3C::LinkChecker; -require HTML::Parser; -@W3C::LinkChecker::ISA = qw(HTML::Parser); - -# Autoflush -$| = 1; - -# Version info -my $PROGRAM = 'W3C LinkChecker'; -my $VERSION = q$Revision: 1.33 $ . '(c) 1999-2000 W3C'; -my $REVISION; ($REVISION = $VERSION) =~ s/Revision: (\d+\.\d+) .*/$1/; - -# Different options specified by the user -my $_cl; -my $_quiet = 0; -my $_summary = 0; -my $_verbose = 0; -my $_progress = 0; -my $_html = 0; -my $_timeout = 60; -my $_chunksize = 1024; -my $_redirects = 1; -my $_dir_redirects = 1; -my $_user; -my $_password; -my $_trusted = '\.w3\.org'; -my $_http_proxy; -my $_recursive = 0; -my $_base_location = '.'; - -# Global variables -# Used for the output -my $first = 2; -# What is our query? -my $query; -# What URI's did we process? (used for $_recursive == 1) -my %processed; -# Result of the HTTP query -my %results; -# List of redirects -my %redirects; - -if ($#ARGV >= 0) { - $_cl = 1; -# Parse command line - my @uris = &parse_arguments(); - if ($_user && (! $_password)) { - &ask_password(); - } - my $uri; - foreach $uri (@uris) { - if (! $_summary) { - printf("%s %s\n", $PROGRAM ,$VERSION); - } else { - $_verbose = 0; - $_progress = 0; - } - $uri = urize($uri); - &check_uri($uri); - } -} else { - use CGI; - use CGI::Carp qw(fatalsToBrowser); - $query = new CGI; - $_cl = 0; - $_verbose = 0; - $_progress = 0; - if ($query->param('summary')) { - $_summary = 1; - } else { - } - if ($query->param('hide_redirects')) { - $_redirects = 0; - } - if ($query->param('hide_dir_redirects')) { - $_dir_redirects = 0; - } - if ($query->param('recursive')) { - $_recursive = 1; - } - $_html = 1; - my $uri; - if ($query->param('uri')) { - $uri = $query->param('uri'); - } elsif ($query->param('url')) { - $uri = $query->param('url'); - } else { - &print_form($query); - } - $uri =~ s/^\s+//g; - if ($uri =~ m/^file:/) { - &file_uri($uri); - } elsif (!($uri =~ m/:/)) { - $uri = 'http://'.$uri; - } - &check_uri($uri, 1); -} - -############################################################################### - -################################ -# Command line and usage stuff # -################################ - -sub parse_arguments() { - my @uris; - my $uris = 0; - while (@ARGV) { - $_ = shift(@ARGV); - if ($uris) { - push(@uris, $_); - } elsif (m/^--$/) { - $uris = 1; - } elsif (m/^-[^-upytdcl]/) { - if (m/q/) { - $_quiet = 1; - $_summary = 1; - } - if (m/s/) { - $_summary = 1; - } - if (m/b/) { - $_redirects = 0; - } - if (m/e/) { - $_dir_redirects = 0; - } - if (m/v/) { - $_verbose = 1; - } - if (m/i/) { - $_progress = 1; - } - if (m/h/) { - $_html = 1; - } - if (m/r/) { - $_recursive = 1; - } - } elsif (m/^--help$/) { - &usage(); - } elsif (m/^--quiet$/) { - $_quiet = 1; - } elsif (m/^--summary$/) { - $_summary = 1; - } elsif (m/^--broken$/) { - $_redirects = 0; - } elsif (m/^--dir-redirects$/) { - $_dir_redirects = 0; - } elsif (m/^--verbose$/) { - $_verbose = 1; - } elsif (m/^--indicator$/) { - $_progress = 1; - } elsif (m/^--html$/) { - $_html = 1; - } elsif (m/^--recursive$/) { - $_recursive = 1; - } elsif (m/^-l|--location$/) { - $_base_location = shift(@ARGV); - } elsif (m/^-u|--user$/) { - $_user = shift(@ARGV); - } elsif (m/^-p|--password$/) { - $_password = shift(@ARGV); - } elsif (m/^-t|--timeout$/) { - $_timeout = shift(@ARGV); - } elsif (m/^-d|--domain$/) { - $_trusted = shift(@ARGV); - } elsif (m/^-y|--proxy$/) { - $_http_proxy = shift(@ARGV); - } elsif (m/^-c|--chunksize$/) { - $_chunksize = shift(@ARGV); - } else { - push(@uris, $_); - } - } - return(@uris); -} - -sub usage() { - print STDERR "$PROGRAM $VERSION -Usage: LinkCheck.pl <options> <uris> -Options: - -s/--summary Result summary only. - -b/--broken Show only the broken links, not the redirects. - -e/--directory Hide directory redirects - e.g. - http://www.w3.org/TR -> http://www.w3.org/TR/ - -r/--recursive Check the documents linked from the first one. - -l/--location uri Scope of the documents checked. - By default, for - http://www.w3.org/TR/html4/Overview.html - for example, it would be: - http://www.w3.org/TR/html4/ - -q/--quiet No output if no errors are found. - -v/--verbose Verbose mode. - -i/--indicator Show progress while parsing. - -u/--user username Specify a username for authentication. - -p/--password password Specify a password. - -t/--timeout value Timeout for the HTTP requests. - -d/--domain domain Regular expression describing the domain to - which the authetication information will be - sent (default: $_trusted). - -y/--proxy proxy Specify an HTTP proxy server. - -c/--chunk-size size Size of the blocks parsed (default: $_chunksize). - -h/--html HTML output. - --help Show this message. -"; - exit(0); -} - -sub ask_password() { - print(STDERR 'Enter your password: '); - system('stty -echo'); - chomp($_password = <STDIN>); - system('stty echo'); - print(STDERR "ok.\n"); -} - -############################################################################### - -########################################### -# Transform foo into file://localhost/foo # -########################################### - -sub urize() { - use URI; - $_ = URI::Escape::uri_unescape($_[0]); - my $base; - my $res = $_; - if (m/:/) { - $base = URI->new($_); - } elsif (m/^\//) { - $base = URI->new('file://localhost'); - } else { - my $pwd; - chop($pwd = `pwd`); - $base = URI->new('file://localhost'.$pwd.'/'); - } - my $u = URI->new($res); - my $result = $u->abs($base); - return($result->as_string()); -} - -####################### -# Do the job on a URI # -####################### - -sub check_uri() { - my ($uri, $html_stuff) = @_; - - if ($_html && $html_stuff) { - $first = 1; - } else { - $first = 0; - } - - my $start; - if (! $_quiet) { - $start = &get_timestamp(); - } - - # Get the document - my $response = &get_document('GET', $uri, 1, \%redirects); - - if (defined($response->{Stop})) { - if ($html_stuff) { - &html_header($uri); - } - &hprintf("\nError: %d %s\n", - $response->code(), $response->message()); - if ($html_stuff) { - &html_footer(); - } - return(-1); - } - - if ($_html) { - if ($html_stuff) { - &html_header($uri); - } - print('<h2>'); - } - - my $absolute_uri = $response->{absolute_uri}->as_string(); - - printf("\nProcessing\t%s\n\n", $absolute_uri); - - if ($_html) { - print("</h2>\n"); - if (! $_summary) { - print "<pre>\n"; - } - } - - $processed{$absolute_uri} = 1; - my $p = &parse_document($uri, $absolute_uri, - $response->content(), 1); - my $base = URI->new($p->{base}); - - # Check anchors - ############### - - if (! $_summary) { - print("Checking anchors:\n"); - } - my %errors; - my $anchor; - foreach $anchor (keys %{$p->{Anchors}}) { - my @lines = keys %{$p->{Anchors}{$anchor}}; - my $times = $#lines + 1; - if ($times > 1) { - $errors{$anchor} = 1; - } - if ($anchor eq '') { - $errors{$anchor} = 1; - } - } - if (! $_summary) { - print(" done.\n"); - } - - # Check links - ############# - - my %links; - # Record all the links - my $link; - foreach $link (keys %{$p->{Links}}) { - my $link_uri = URI->new($link); - my $abs_link_uri = URI->new_abs($link_uri, $base); - my $lines; - foreach $lines (keys %{$p->{Links}{$link}}) { - my $canonical = URI->new($abs_link_uri->canonical()); - my $url = $canonical->scheme().':'.$canonical->opaque(); - my $fragment = $canonical->fragment(); - if (! $fragment) { - $links{$url}{location}{$lines} = 1; - } else { - $links{$url}{fragments}{$fragment}{$lines} = 1; - } - } - } - - # Build the list of broken URI's - my %broken; - my $u; - foreach $u (keys %links) { - # Don't check mailto: URI's - next if ($u =~ m/^mailto:/); - if (! $_summary) { - &hprintf("Checking link %s\n", $u); - } - &check_validity($uri, $u, \%links, \%redirects); - if ($_verbose) { - &hprintf("\tReturn code: %s\n", $results{$u}{location}{code}); - } - if ($results{$u}{location}{success}) { - my $fragment; - if ($results{$u}{location}{display} >= 400) { - $broken{$u}{location} = 1; - } - foreach $fragment (keys %{$links{$u}{fragments}}) { - if ($_verbose) { - &hprintf("\t\t%s %s - Lines: %s\n", - $fragment, - ($results{$u}{fragments}{$fragment} - ? 'OK' : 'Not found'), - join(',', - keys %{$links{$u}{fragments}{$fragment}}) - ); - } - # A broken fragment? - if ($results{$u}{fragments}{$fragment} == 0) { - $broken{$u}{fragments}{$fragment} += 2; - } - } - } else { - # Couldn't find the document - $broken{$u}{location} = 1; - my $fragment; - foreach $fragment (keys %{$links{$u}{fragments}}) { - $broken{$u}{fragments}{$fragment}++; - } - } - } - if (! $_summary) { - my $stop = &get_timestamp(); - &hprintf("Processed in %ss.\n", &time_diff($start, $stop)); - } - - # Display results - if ($_html) { - if (! $_summary) { - print "</pre>\n"; - } - } - if (! $_quiet) { - print "\n"; - } - &anchors_summary($p->{Anchors}, \%errors); - &links_summary(\%links, \%results, \%broken, \%redirects); - - # Do we want to process other documents? - if ($_recursive) { - if ($_base_location eq '.') { - # Get the name of the original directory - # e.g. http://www.w3.org/TR/html4/Overview.html - # should return http://www.w3.org/TR/html4/ - $results{$uri}{parsing}{base} =~ m/^(.*\/)[^\/]*/; - $_base_location = $1; - } - foreach $u (keys %links) { - next if (! (# Check if it's in our scope for recursion - ($u =~ m/^$_base_location/) && - # and the link is not broken - $results{$u}{location}{success}) - ); - # Check if we have already processed the URI - next if (&already_processed($u) != 0); - # Do the job - print "\n"; - if (! $_html) { - my $i = 40; - while ($i--) { - print('-'); - } - } else { - print('<hr>'); - # For the online version, wait for a while to avoid abuses - sleep(3); - } - print "\n"; - &check_uri($u, 0); - } - } - - if ($_html && $html_stuff) { - &html_footer(); - } -} - -############################# -# Get a document to process # -############################# - -sub get_document() { - my ($method, $uri, $in_recursion, $redirects) = @_; - - # Get the document - my $response = &get_uri($method, $uri); - &record_results($uri, $method, $response); - $first = 0; - if (! $response->is_success()) { - if (! $in_recursion) { - if ($response->code() == 401) { - &authentication($response); - } else { - if ($_html) { - &html_header($uri); - } - &hprintf("Error: %d %s\n", - $response->code(), $response->message()); - if ($_html) { - &html_footer(); - } - } - } - $response->{Stop} = 1; - return($response); - } - - &record_redirects($redirects, $response->{Redirects}); - - # What URI are we processing by the way? - my $base_uri = URI->new($response->base()); - my $request_uri = URI->new($response->request->url); - $response->{absolute_uri} = $base_uri->abs($request_uri); - - # Parse the document - if (! ($response->header('Content-type') =~ m/text\/html/)) { - if (! $in_recursion) { - &hprintf("Can't check link: Content-type is '%s'.\n", - $response->header('Content-type')); - } - $response->{Stop} = 1; - } - - # Ok, return the information - return($response); -} - -################################################## -# Check whether a URI has already been processed # -################################################## - -sub already_processed($) { - my ($uri, %redirects) = @_; - # Don't be verbose for that part - my $summary_value = $_summary; - $_summary = 1; - # Do a HEAD - my $response = &get_document('HEAD', $uri, 1); - $_summary = $summary_value; - # Have we already processed it? - return(-1) if (defined($response->{Stop})); - return(1) if (defined($processed{$response->{absolute_uri}->as_string()})); - return(0); -} - -############################ -# Get the content of a URI # -############################ - -sub W3C::UserAgent::simple_request() { - my $self = shift; - my $response = $self->W3C::UserAgent::SUPER::simple_request(@_); - if (! defined($self->{FirstResponse})) { - $self->{FirstResponse} = $response->code(); - $self->{FirstMessage} = $response->message(); - } - return $response; -} - -sub W3C::UserAgent::redirect_ok { - my ($self, $request) = @_; - - if (! ($_summary || $first)) { - &hprintf("\n%s %s ", $request->method(), $request->uri()); - } - - $self->{Redirects}{$self->{fetching}} = $request->uri(); - $self->{fetching} = $request->uri(); - - return 0 if $request->method() eq "POST"; - return 1; -} - -sub get_uri() { - # Here we have a lot of extra parameters in order not to lose information - # if the function is called several times (401's) - my ($method, $uri, $start, $redirects, $code, $realm, $message, $tested) = @_; - if (! defined($start)) { - $start = &get_timestamp(); - } - # Prepare the query - my $ua = new W3C::UserAgent; - $ua->timeout($_timeout); - $ua->agent('W3CLinkChecker/'.$REVISION.' '.$ua->agent()); - if ($_http_proxy) { - $ua->proxy('http', 'http://'.$_http_proxy); - } - $ua->{uri} = $uri; - $ua->{fetching} = $uri; - if (defined($redirects)) { - $ua->{Redirects} = $redirects; - } - my $count = 0; - my $response; - if (! ($_summary || $first)) { - &hprintf("%s %s ", $method, $uri); - } - my $request = new HTTP::Request($method, $uri); - # Are we providing authentication info? - if (defined($tested) - && ($request->url->netloc =~ /$_trusted$/)) { - if (defined($ENV{HTTP_AUTHORIZATION})) { - $request->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION}); - } elsif (defined($_user) && defined($_password)) { - use MIME::Base64; - my $authorization = encode_base64($_user.':'.$_password); - $request->headers->header(Authorization => 'Basic '.$authorization); - } - } - # Do the query - $response = $ua->request($request); - # Get the results - if (! defined($code)) { - $code = $ua->{FirstResponse}; - } - if (! defined($message)) { - $message = $ua->{FirstMessage}; - } - # Authentication requested? - if (($response->code() == 401) - && (defined($ENV{HTTP_AUTHORIZATION}) - || (defined($_user) && defined($_password))) - && !defined ($tested)) { - # Deal with authentication and avoid loops - if (! defined ($realm)) { - $response->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; - $realm = $1; - } - if (! ($_summary || $first)) { - print "\n"; - } - return &get_uri($method, $response->request->url, - $start, $ua->{Redirects}, - $code, $realm, $message, 1); - } - # Record the redirects - $response->{Redirects} = $ua->{Redirects}; - my $stop = &get_timestamp(); - if (! ($_summary || $first)) { - &hprintf(" fetched in %ss\n", &time_diff($start,$stop)); - } - $response->{OriginalCode} = $code; - $response->{OriginalMessage} = $message; - if (defined($realm)) { - $response->{Realm} = $realm; - } - return $response; -} - -######################################### -# Record the results of an HTTP request # -######################################### - -sub record_results() { - my ($uri, $method, $response) = @_; - $results{$uri}{method} = $method; - $results{$uri}{location}{code} = $response->code(); - $results{$uri}{location}{type} = $response->header('Content-type'); - $results{$uri}{location}{display} = $results{$uri}{location}{code}; - $results{$uri}{location}{orig} = $response->{OriginalCode}; - # Did we get a redirect? - if ($response->{OriginalCode} != $response->code()) { - $results{$uri}{location}{orig_message} = $response->{OriginalMessage}; - $results{$uri}{location}{redirected} = 1; - } - $results{$uri}{location}{success} = $response->is_success(); - # Stores the authentication information - if (defined($response->{Realm})) { - $results{$uri}{location}{realm} = $response->{Realm}; - $results{$uri}{location}{display} = 401; - } - if (($results{$uri}{location}{display} == 401) - && ($results{$uri}{location}{code} == 404)) { - $results{$uri}{location}{record} = 404; - } else { - $results{$uri}{location}{record} = $results{$uri}{location}{display}; - } - # Did it fail? - if (! $results{$uri}{location}{success}) { - $results{$uri}{location}{message} = $response->message(); - if ($_verbose) { - &hprintf("Error: %d %s\n", - $results{$uri}{location}{code}, - $results{$uri}{location}{message}); - } - return; - } -} - -#################### -# Parse a document # -#################### - -sub parse_document() { - my ($uri, $location, $document, $links) = @_; - - my $p; - - if (defined($results{$uri}{parsing})) { - # We have already done the job. Woohoo! - $p->{base} = $results{$uri}{parsing}{base}; - $p->{Anchors} = $results{$uri}{parsing}{Anchors}; - $p->{Links} = $results{$uri}{parsing}{Links}; - return($p); - } - - my $start; - $p = W3C::LinkChecker->new(); - # Loose interpretation of the HTML comments since browsers will do the same - $p->strict_comment(0); - $p->{base} = $location; - if (! $_summary) { - $start = &get_timestamp(); - print("Parsing...\n"); - } - if (!$_summary || $_progress) { - $p->{Total} = ($document =~ tr/\n//); - } - # We only look for anchors if we are not interested in the links - # obviously, or if we are running a recursive checking because we - # might need this information later - $p->{only_anchors} = !($links || $_recursive); - - # Parse small chunks: much faster - my @chunks = unpack("a$_chunksize"x(length($document)/$_chunksize).'a*', - $document); - for (@chunks) { - $p->parse($_); - } - - if (! $_summary) { - my $stop = &get_timestamp(); - if ($_progress) { - print "\r"; - } - &hprintf(" done (%d lines in %ss).\n", - $p->{Total}, &time_diff($start, $stop)); - } - - # Save the results before exiting - $results{$uri}{parsing}{base} = $p->{base}; - $results{$uri}{parsing}{Anchors} = $p->{Anchors}; - $results{$uri}{parsing}{Links} = $p->{Links}; - - return($p); -} - -#################################### -# Constructor for W3C::Linkchecker # -#################################### - -sub W3C::LinkChecker::new() { - my $p = HTML::Parser::new(@_); - - # Line count - $p->{Line} = 1; - # Attribute for ids in element a - # Up to XHTML 1.0, it is 'name'. After that it is 'id'. - $p->{check_name} = 1; - - return $p; -} - -################################################# -# Record or return the doctype of the document # -################################################# - -sub W3C::LinkChecker::doctype() { - my ($self, $dc) = @_; - if (! $dc) { - return $self->{doctype}; - } - $self->{doctype} = $dc; - # Check if we should check <a name="..."> or not - # The good way to do that is to get the DTD and parse it but it is - # much more complex - if ($dc eq '-//W3C//DTD XHTML Basic 1.0//EN') { - $self->{check_name} = 0; - } -} - -####################################### -# Count the number of lines in a file # -####################################### - -sub W3C::LinkChecker::new_line() { - my ($self, $string) = @_; - my $count = ($string =~ tr/\n//); - $self->{Line} = $self->{Line} + $count; - if ($_progress) { - printf("\r%4d%%", int($self->{Line}/$self->{Total}*100)); - } -} - -####################################### -# start function used by HTML::Parser # -####################################### - -sub W3C::LinkChecker::start() { - my ($self, $tag, $attr, $attrseq, $text) = @_; - my $anchor; - # Links - if (!$self->{only_anchors}) { - my $link; - $anchor = $attr->{id}; - if (($tag eq 'a')) { - $link = $attr->{href}; - if ($self->{check_name}) { - $anchor = $attr->{name}; - } - } elsif ($tag eq 'img') { - $link = $attr->{src}; - } elsif (($tag eq 'frame') || ($tag eq 'link')) { - $link = $attr->{href}; - } - if (defined($link)) { - $self->{Links}{$link}{$self->{Line}}++; - } - # Just anchors - } else { - $anchor = $attr->{id}; - if (($tag eq 'a') && $self->{check_name}) { - $anchor = $attr->{name}; - } - } - if (defined($anchor)) { - $self->{Anchors}{$anchor}{$self->{Line}}++; - } - # Line counting - if ($text =~ m/\n/) { - $self->new_line($text); - } -} - -#################################################### -# Overloading functions for line counting purposes # -#################################################### - -# W3C::LinkChecker::text() is called by end(), declaration() and comment() -sub W3C::LinkChecker::text() { - my ($self, $text) = @_; - if (!$_progress) { - # If we are just extracting information about anchors, - # parsing this part is only cosmetic (progress indicator) - return unless !$self->{only_anchors}; - } - if ($text =~ /\n/) { - $self->new_line($text); - } -} - -sub W3C::LinkChecker::end() { - my $self = shift; - return unless !$self->{only_anchors}; - shift; - my $text = shift; - $self->text($text); -} - -sub W3C::LinkChecker::declaration() { - my ($self, $text) = @_; - # Extract the doctype - my @declaration = split(/\s+/, $text, 4); - if (($#declaration >= 3) && - ($declaration[0] eq 'DOCTYPE') && - (lc($declaration[1]) eq 'html')) { - # Parse the doctype declaration - $text =~ m/^DOCTYPE\s+html\s+PUBLIC\s+\"([^\"]*)\"(\s+\"([^\"]*)\")?\s*$/i; - # Store the doctype - if ($1) { - $self->doctype($1); - } - # If there is a link to the DTD, record it - if (!$self->{only_anchors} && $3) { - $self->{Links}{$3}{$self->{Line}}++; - } - } - return unless !$self->{only_anchors}; - $self->text($text); -} - -sub W3C::LinkChecker::comment() { - my $self = shift; - return unless !$self->{only_anchors}; - my $text = shift; - $self->text($text); -} - -################################ -# Check the validity of a link # -################################ - -sub check_validity() { - use HTTP::Status; - my ($testing, $uri, $links, $redirects) = @_; - - # Checking file: URI's is not allowed with a CGI - if ($testing ne $uri) { - if ((! $_cl) && (!($testing =~ m/^file:/)) && ($uri =~ m/^file:/)) { - # Can't test? Return 400 Bad request. - $results{$uri}{location}{code} = 400; - $results{$uri}{location}{success} = 0; - $results{$uri}{location}{message} = 'Error: \'file:\' URI not allowed'; - if ($_verbose) { - &hprintf("Error: %d %s\n", - $results{$uri}{location}{code}, - $results{$uri}{location}{message}); - } - return; - } - } - - # Get the document with the appropriate method - my $method; - my @fragments = keys %{$links->{$uri}{fragments}}; - if ($#fragments == -1) { - $method = 'HEAD'; - } else { - $method = 'GET'; - } - - my $response; - my $being_processed = 0; - if ((! defined($results{$uri})) - || (($method eq 'GET') && ($results{$uri}{method} eq 'HEAD'))) { - $being_processed = 1; - $response = &get_uri($method, $uri); - # Record the redirects - &record_redirects($redirects, $response->{Redirects}); - # Get the information back from get_uri() - &record_results($uri, $method, $response); - } - - if ($#fragments == -1) { - return; - } - # There are fragments. Parse the document. - my $p; - if ($being_processed) { - # Can we really parse the document? - if (! ($results{$uri}{location}{type} =~ m/text\/html/i)) { - if ($_verbose) { - &hprintf("Can't check content: Content-type is '%s'.\n", - $response->header('Content-type')); - } - return; - } - # Do it then - $p = &parse_document($uri, $response->base(), - $response->as_string(), 0); - } else { - # We already had the information - $p->{Anchors} = $results{$uri}{parsing}{Anchors}; - } - # Check that the fragments exist - my $fragment; - foreach $fragment (keys %{$links->{$uri}{fragments}}) { - if (defined($p->{Anchors}{$fragment}) - || &escape_match($fragment, $p->{Anchors})) { - $results{$uri}{fragments}{$fragment} = 1; - } else { - $results{$uri}{fragments}{$fragment} = 0; - } - } -} - -sub escape_match($, \%) { - use URI::Escape; - my ($a, $hash) = (uri_unescape($_[0]), $_[1]); - foreach $b (keys %$hash) { - if ($a eq uri_unescape($b)) { - return(1); - } - } - return(0); -} - -########################## -# Ask for authentication # -########################## - -sub authentication() { - my $r = $_[0]; - $r->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; - my $realm = $1; - my $authHeader = $r->headers->www_authenticate; - if ($_cl) { - printf(STDERR "\nAuthentication is required for %s.\n", $r->request->url); - printf(STDERR "The realm is %s.\n", $realm); - print(STDERR "Use the -u and -p options to specify a username and password.\n"); - } else { - printf("Status: 401 Authorization Required\nWWW-Authenticate: %s\nConnection: close\nContent-Type: text/html\n\n", $r->headers->www_authenticate); - printf("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"> -<html> -<head> -<title>401 Authorization Required</title> -</head> -<body> -<h1>Authorization Required</h1> -<p>You need %s access to %s to perform Link Checking.</p> -</body> -</html> -", $realm, $r->request->url); - } -} - -################## -# Get statistics # -################## - -sub get_timestamp() { - require 'sys/syscall.ph'; - my $timestamp = pack('LL', ()); - syscall(&SYS_gettimeofday, $timestamp, 0) != -1 or $timestamp = 0; - return($timestamp); -} - -sub time_diff() { - my @start = unpack('LL', $_[0]); - my @stop = unpack('LL', $_[1]); - for ($start[1], $stop[1]) { - $_ /= 1_000_000; - } - return(sprintf("%.2f", ($stop[0]+$stop[1])-($start[0]+$start[1]))); -} - -######################## -# Handle the redirects # -######################## - -# Record the redirects in a hash -sub record_redirects(\%, \%) { - my ($redirects, $sub) = @_; - my $r; - foreach $r (keys %$sub) { - $redirects->{$r} = $sub->{$r}; - } -} - -# Determine if a request is redirected -sub is_redirected($, %) { - my ($uri, %redirects) = @_; - return(defined($redirects{$uri})); -} - -# Get a list of redirects for a URI -sub get_redirects($, %) { - my ($uri, %redirects) = @_; - my @history = ($uri); - my $origin = $uri; - my %seen; - while ($redirects{$uri}) { - $uri = $redirects{$uri}; - push(@history, $uri); - last if ($uri eq $origin); - } - return(@history); -} - -#################################################### -# Tool for sorting the unique elements of an array # -#################################################### - -sub sort_unique() { - my %saw; - @saw{@_} = (); - return (sort { $a <=> $b } keys %saw); -} - -##################### -# Print the results # -##################### - -sub anchors_summary(\%, \%) { - my ($anchors, $errors) = @_; - # Number of anchors found. - if (! $_quiet) { - if ($_html) { - print('<p>'); - } - my @anchors = keys %{$anchors}; - &hprintf("Found %d anchors.", $#anchors+1); - if ($_html) { - print('</p>'); - } - print("\n"); - } - # List of the duplicates, if any. - my @errors = keys %{$errors}; - if ($#errors < 0) { - if (! $_quiet && $_html) { - print "<p>Valid anchors!</p>\n"; - } - return; - } - if ($_html) { - print('<p>'); - } - print('List of duplicate and empty anchors:'); - if ($_html) { - print("</p>\n<table border=\"1\">\n<tr><td><b>Anchors</b></td><td><b>Lines</b></td></tr>"); - } - print("\n"); - my $anchor; - foreach $anchor (@errors) { - my $format; - if ($_html) { - $format = "<tr><td>%s</td><td>%s</td></tr>\n"; - } else { - $format = "\t%s\tLines: %s\n"; - } - printf($format, - &encode($anchor eq '' ? 'Empty anchor' : $ anchor), - join(', ', &sort_unique(keys %{$anchors->{$anchor}}))); - } - if ($_html) { - print("</table>\n"); - } -} - -sub show_link_report { - my ($links, $results, $broken, $redirects, $urls) = @_; - - # Head of the table - if ($_html) { - print("\n<table border=\"1\">\n<tr><td><b>Location</b></td><td><b>Code</b></td><td><b>Fragment</b></td><td><b>Lines</b></td></tr>"); - } - print("\n"); - - # Process each URL - my $u; - foreach $u (@{$urls}) { - my @fragments = keys %{$broken->{$u}{fragments}}; - # Count the rows displayed - my $n_fragments = $#fragments+1; - if (!defined($broken->{$u}{location})) { - $n_fragments++; - } - # Did we get a redirect? - my $redirected = &is_redirected($u, %$redirects); - # List of lines - my @total_lines; - my ($f, $l); - foreach $l (keys %{$links->{$u}{location}}) { - push (@total_lines, $l); - } - foreach $f (keys %{$links->{$u}{fragments}}) { - if ($f eq $u) { - next if (defined($links->{$u}{$u}{-1})); - } - my $l; - foreach $l (keys %{$links->{$u}{fragments}{$f}}) { - push (@total_lines, $l); - } - } - my $lines_list = join(', ', - &sort_unique(@total_lines)); - if ($_html) { - # Main info - my @redirects_urls = &get_redirects($u, %$redirects); - for (@redirects_urls) { - $_ = &show_url($_); - } - printf("<tr><th rowspan=\"%d\"%s>%s</th><th rowspan=\"%d\"%s>%d%s%s%s%s</th><td>%s</td><td%s>%s</td></tr>\n", - # Number of fragments (for rowspan) - $n_fragments, - # Color - &bgcolor($results->{$u}{location}{record}), - # List of redirects - $redirected ? join('<br>-> ', - @redirects_urls) : &show_url($u), - # Number of fragments (for rowspan) - $n_fragments, - # Color - &bgcolor($results->{$u}{location}{orig}), - # Original HTTP reply - $results->{$u}{location}{orig}, - # Final HTTP reply - ($results->{$u}{location}{code} != $results->{$u}{location}{orig}) - ? '-> '.&encode($results->{$u}{location}{code}) - : '', - # Realm - defined($results->{$u}{location}{realm}) - ? '<br>Realm: '.&encode($results->{$u}{location}{realm}) - : '', - # HTTP original message - defined($results->{$u}{location}{orig_message}) - ? '<br>'.&encode($results->{$u}{location}{orig_message}).' ->' - : '', - # HTTP final message - $results->{$u}{location}{message} - ? '<br>'.&encode($results->{$u}{location}{message}) - : '', - '', - # Color again - &bgcolor($results->{$u}{location}{code}), - # List of lines - $lines_list); - } else { - printf("\n%s\t%s\n Code: %d%s %s\n", - # List of redirects - $redirected ? join("\n-> ", - &get_redirects($u, %$redirects)) : $u, - # List of lines - $lines_list ? 'Lines: '.$lines_list : '' , - # Original HTTP reply - $results->{$u}{location}{orig}, - # Final HTTP reply - ($results->{$u}{location}{code} != $results->{$u}{location}{orig}) - ? ' -> '.$results->{$u}{location}{code} - : '', - # HTTP message - $results->{$u}{location}{message} ? $results->{$u}{location}{message} : ''); - } - # Fragments - foreach $f (@fragments) { - next if ($f eq $u); - if ($_html) { - my $color = ($broken->{$u}{fragments}{$f} > 1) ? &bgcolor(404) : &bgcolor($results->{$u}{location}{code}); - printf("<tr><td%s>%s</td><td%s>%s</td></tr>\n", - # Color - $color, - # Broken fragment - &show_url($u, $f), - # Color - $color, - # List of lines - join(', ', - &sort_unique(keys %{$links->{$u}{fragments}{$f}}))); - } else { - printf("\t%-30s\tLines: %s\n", - # Fragment - $f, - # List of lines - join(', ', - &sort_unique(keys %{$links->{$u}{fragments}{$f}}))); - } - } - } - # End of the table - if ($_html) { - print("</table>\n"); - } -} - -sub links_summary { - # Advices to fix the problems - - my %todo = ( 200 => 'nothing !', - 300 => 'it usually means that there is a typo in a link that triggers <strong>mod_speling</strong> action - this should be fixed', - 301 => 'usually nothing, unless the end point of the redirect is broken (in which case, the <B>Code</B> column is RED)', - 302 => 'usually nothing, unless the end point of the redirect is broken (in which case, the <B>Code</B> column is RED)', - 400 => 'Usually the sign of a malformed URL that cannot be parsed by the server', - 401 => 'The link is not public. You had better specify it.', - 403 => 'The link is forbidden ! This needs fixing. Usual suspect: a missing Overview.html or index.html, or bad access control', - 404 => 'The link is broken. Fix it <B>NOW</B>', - 405 => 'The server does not allow HEAD requests. How liberal. Go ask the guys who run this server why.', - 407 => 'The link is a proxy, but requires Authentication.', - 408 => 'The request timed out', - 415 => 'The media type is not supported.', - 500 => 'The server failed. It is a server side problem.', - 501 => 'HEAD or GET is not implemented on this server... What kind of server is that?', - 503 => 'The server cannot service the request, for some unknown reason'); - - my ($links, $results, $broken, $redirects) = @_; - - # Count the links. Useless but interesting. - if (! $_quiet) { - if ($_html) { - print("\n\n<p>"); - } - my @links = keys %$links; - my $n_fragments = 0; - my $n_total = 0; - my $u; - # Give a few stats - foreach $u (@links) { - my @fragments = keys %{$links->{$u}{fragments}}; - $n_fragments += $#fragments + 1; - if (defined($links->{$u}{location})) { - $n_fragments++; - } - my ($f, $l); - foreach $l (keys %{$links->{$u}{location}}) { - $n_total += $links->{$u}{location}{$l}; - } - foreach $f (@fragments) { - my @lines = keys %{$links->{$u}{fragments}{$f}}; - foreach $l (@lines) { - $n_total += $links->{$u}{fragments}{$f}{$l}; - } - } - } - &hprintf("Found %d locations for %d unique URI's (%d total).", - $#links+1, $n_fragments, $n_total); - if ($_html) { - print('</p>'); - } - print("\n"); - } - - # Print a summary - if ($_html) { - my %code_summary; - my $u; - foreach $u (keys %$links) { - if (defined($results->{$u}{location}{record})) { - my $c; - if ($results->{$u}{location}{record} == 200) { - $c = $results->{$u}{location}{orig}; - } else { - $c = $results->{$u}{location}{record}; - } - $code_summary{$c}++; - } - } - print "<table border=\"1\">\n<tr><td><b>Code</b></td><td><b>Occurences</b></td><td><b>What to do</b></td></tr>\n"; - my $code; - foreach $code (sort(keys(%code_summary))) { - printf("<tr%s>", &bgcolor($code)); - printf("<td>%s</td>", $code); - printf("<td>%s</td>", $code_summary{$code}); - printf("<td>%s</td>", $todo{$code}); - print "</tr>\n"; - } - print "</table>\n"; - } - - # List of the broken links - my @urls = keys %{$broken}; - my @dir_redirect_urls = (); - if ($_redirects) { - # Add the redirected URI's to the report - my $l; - for $l (keys %$redirects) { - next unless (defined($results->{$l}) - && defined($links->{$l}) - && !defined($broken->{$l})); - # Check whether we have a "directory redirect" - # e.g. http://www.w3.org/TR -> http://www.w3.org/TR/ - my @redirects = &get_redirects($l, %$redirects); - if (($#redirects == 1) - && (($redirects[0].'/') eq $redirects[1])) { - push(@dir_redirect_urls, $l); - next; - } - push(@urls, $l); - } - } - - # Broken links and redirects - if ($#urls < 0) { - if (! $_quiet && $_html) { - print "<p>Valid links!</p>\n"; - } - } else { - if ($_html) { - print('<p>'); - } - print("\nList of broken links"); - if ($_redirects) { - print(' and redirects'); - } - print(':'); - if ($_html) { - print('<br>Broken fragments appear in red</p>'); - } - &show_link_report($links, $results, $broken, $redirects, - \@urls); - } - - # Show directory redirects - if ($_redirects && $_dir_redirects && ($#dir_redirect_urls > -1)) { - if ($_html) { - print('<p>'); - } - print("\nList of directory redirects:"); - &show_link_report($links, $results, $broken, $redirects, - \@dir_redirect_urls); - } -} - -############################################################################### - -################## -# HTML interface # -################## - -sub html_header() { - my $uri = &encode($_[0]); - # Cache control? - if (defined($_[1])) { - print "Cache-Control: no-cache\nPragma: no-cache\n"; - } - print "Content-type: text/html - -<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"> -<html> -<head> -<title>W3C Link Ckecker: $uri</title> -<style type=\"text/css\"> - -BODY { - font-family: sans-serif; - color: black; - background: white; -} - -A:link, A:active { - color: #00E; - background: transparent; -} - -A:visited { - color: #529; - background: transparent; -} - -PRE { - font-family: monospace -} - -IMG { - color: white; - border: none; -} - -</style> -</head> -<body> -<a href=\"http://www.w3.org/\"><img alt=\"W3C\" src=\"http://www.w3.org/Icons/w3c_home\" height=\"48\" width=\"72\"></a> -<h1>W3C<sup>®</sup> Link Checker: $uri</h1> -\n"; -} - -sub bgcolor() { - my ($code) = @_; - my $color; - my $r = HTTP::Response->new($code); - if ($r->is_success()) { - return ''; - } - if ($code == 300) { - return ' bgcolor="magenta"'; - } - if ($code == 401) { - return ' bgcolor="aqua"'; - } - if ($r->is_redirect()) { - return ' bgcolor="yellow"'; - } - if ($r->is_error()) { - return ' bgcolor="red"'; - } - return ' bgcolor="grey"'; -} - -sub show_url() { - my ($url, $fragment) = @_; - if (defined($fragment)) { - $url .= '#'.$fragment; - } - return('<a href="'.$url.'">'.&encode(defined($fragment) ? $fragment : $url).'</a>'); -} - -sub html_footer() { - print " -<hr> -<address> -$PROGRAM $VERSION<br> -Report bugs to <a href=\"mailto:hugo\@w3.org\">Hugo Haas</a>. -Check out the <a href=\"http://dev.w3.org/cvsweb/validator/httpd/cgi-bin/LinkChecker.pl\">source code</a>. -</address> -</body> -</html> -"; -} - -sub file_uri() { - my $uri = $_[0]; - &html_header($uri); - print "<h2>Forbidden</h2> -<p>You cannot check such a URI (<code>$uri</code>).</p> -"; - &html_footer(); - exit; -} - -sub print_form() { - my ($q) = @_; - &html_header($VERSION, 1); - print "<form action=\"".$q->self_url()."\" method=\"get\"> -<p>Enter the URI that you want to check:</p> -<p><input type=\"text\" size=\"50\" name=\"uri\"></p> -<p>Options:</p> -<p> - <input type=\"checkbox\" name=\"summary\"> Summary only - <br> - <input type=\"checkbox\" name=\"hide_redirects\"> Hide redirects - <br> - <input type=\"checkbox\" name=\"hide_dir_redirects\"> Hide directory redirects - <br> - <input type=\"checkbox\" name=\"recursive\"> Check linked documents recursively -</p> -<p><input type=\"submit\" name=\"submit\" value=\"Check\"></p> -</form> -"; - &html_footer(); - exit; -} - -sub encode() { - if (! $_html) { - return @_; - } else { - return HTML::Entities::encode(@_); - } -} - -sub hprintf() { - if (! $_html) { - printf(@_); - } else { - print HTML::Entities::encode(sprintf($_[0], @_[1..@_-1])); - } -} diff --git a/httpd/cgi-bin/p3p b/httpd/cgi-bin/p3p deleted file mode 100755 index 5e272fd..0000000 --- a/httpd/cgi-bin/p3p +++ /dev/null @@ -1,6 +0,0 @@ -#! perl -print "Content-type: text/html - -<html><body>koike</body></html> -"; - diff --git a/httpd/cgi-bin/referers b/httpd/cgi-bin/referers deleted file mode 100755 index e8abf54..0000000 --- a/httpd/cgi-bin/referers +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/sh -# -# referers: CGI script to show the top referers of the 50,000 most recent -# hits to this server. -# -# This source code is available under the license at: -# http://www.w3.org/Consortium/Legal/copyright-software -# -# $Id: referers,v 1.8 2000-05-08 02:12:31 gerald Exp $ - -LOG=/usr/local/apache/logs/validator_log; export LOG - -cat <<EOHD -Content-Type: text/html - -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> - -<title>top referers to validator.w3.org</title> - -<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b"> - -<p> - This page lists the top 100 referers to <a href="/">the W3C Validation - Service</a> (taken from the most recent 50,000 log entries.) -</p> - -<table> -EOHD - -tail -50000 $LOG | egrep -v image/png | egrep -v image/gif | \ - cut -d\" -f4 | sort | uniq -c | \ - grep -v ' \-$' | sort -rn | head -100 | sed 's/\&/\&/g; s/</\</g' | \ - awk '{printf("<tr><td>%s</td><td><a href=\"%s\">%s</a></td></tr>\n",\ - $1, $2, $2)}' - -cat <<EOHD -</table> - -<hr> -<address> -<a href="http://www.w3.org/People/Gerald/">Gerald Oskoboiny</a><br> -\$Date: 2000-05-08 02:12:31 $ \ -</address> - -EOHD - -exit - diff --git a/httpd/cgi-bin/traceroute b/httpd/cgi-bin/traceroute deleted file mode 100755 index 9816c0b..0000000 --- a/httpd/cgi-bin/traceroute +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/local/bin/perl -# -# traceroute: a CGI script that provides a Web interface to traceroute -# -# Copyright 1998 Gerald Oskoboiny <gerald@w3.org> -# -# This source code is available under the license at: -# http://www.w3.org/Consortium/Legal/copyright-software -# -# $Id: traceroute,v 1.3 2001-12-21 23:39:27 gerald Exp $ -# - -print <<"EOF"; -Content-Type: text/html - -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<html> - -<head> - <title>service discontinued</title> -</head> - -<body> - -<p> - Sorry, the traceroute service that used to be here has been turned off due to - abuse. There are a number of other <a - href="http://www.google.com/search?q=traceroute">traceroute gateways</a> - online, please use one of those instead. (if you are specifically interested - in the route from MIT, see <a - href="http://jis.mit.edu:8001/cgi-bin/traceroute">TraceRoute from MIT</a>.) -</p> - -<hr> -<address> -<a href="http://www.w3.org/People/Gerald/">Gerald Oskoboiny</a><br> -\$Date: 2001-12-21 23:39:27 $ \ -</address> - -</body> -EOF - -exit; # turned off due to abuse; the real script follows: - -$| = 1; - -# accept either traceroute/foo or traceroute?foo; default to REMOTE_ADDR -# if nothing else is specified -$addr = $ENV{PATH_INFO} || $ENV{QUERY_STRING} || $ENV{REMOTE_ADDR}; - -$addr =~ tr/+/ /; -$addr =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; -$addr =~ s,^addr=,,; -$addr =~ s/[^A-Za-z0-9\.-]//g; # for security - -print <<"EOF"; -Content-Type: text/html - -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<html> - -<head> - <title>Traceroute from validator.w3.org to $addr</title> - <link rev="made" href="mailto:gerald\@w3.org"> -</head> - -<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b"> - -<p> - Here is the result of a traceroute from <code>validator.w3.org</code> - to <code>$addr</code>: -</p> - -<form action="/traceroute"> - <input type=text name=addr size=40 value="$addr"> - <input type=submit value="Traceroute"> - <input type=reset> -</form> - -<pre> -EOF - -open( TRACEROUTE, "/usr/sbin/traceroute $addr | " ) || - die "couldn't open pipe to traceroute! $!"; - -while (<TRACEROUTE>) { - chomp; - s/\&/\&/g; - s/\</\</g; - print "$_\n"; -} -close( TRACEROUTE ) || die "couldn't close pipe to traceroute! $!"; - -print <<"EOF"; -</pre> - -<hr> -<address> -<a href="http://www.w3.org/People/Gerald/">Gerald Oskoboiny</a><br> -\$Date: 2001-12-21 23:39:27 $ \ -</address> - -EOF - -exit; - |