diff options
author | renaudb <renaudb@localhost> | 1998-09-11 21:34:04 +0000 |
---|---|---|
committer | renaudb <renaudb@localhost> | 1998-09-11 21:34:04 +0000 |
commit | 198eab5db5c34e7813f78e2e3c151ea26877e885 (patch) | |
tree | 313d2ee598aa59563678cf9408686ebb809835f9 | |
parent | ac7c600f8be40829b2bb4254eeeef61358294833 (diff) | |
download | markup-validator-198eab5db5c34e7813f78e2e3c151ea26877e885.zip markup-validator-198eab5db5c34e7813f78e2e3c151ea26877e885.tar.gz markup-validator-198eab5db5c34e7813f78e2e3c151ea26877e885.tar.bz2 |
finished handling of 401s. fixed double responses too
-rwxr-xr-x | httpd/cgi-bin/checklink.pl | 102 |
1 files changed, 74 insertions, 28 deletions
diff --git a/httpd/cgi-bin/checklink.pl b/httpd/cgi-bin/checklink.pl index 21ec0e7..782d01d 100755 --- a/httpd/cgi-bin/checklink.pl +++ b/httpd/cgi-bin/checklink.pl @@ -16,15 +16,17 @@ sub new_line { } # called by HTML::Parser::parse # overriden to count lines +# I looked at getting text from links for the output +# but I don't see why it would be of interest +# plus, need to remove markup within <a></a> +# probably not worth it sub start { my $self=shift; my ($tag,$attr)=@_; my $link; - $self->{STARTLINK} = 0; $link=$attr->{href} if $tag eq "a"; $link=$attr->{src} if $tag eq "img"; if (defined $link){ - $self->{STARTLINK} = 1; $self->{Links}{$link}{$self->{Line}+1}++; } } @@ -35,16 +37,32 @@ use vars qw(@ISA); @ISA = qw(LWP::Parallel::UserAgent); # function overload to handle 301s and build # redirect chain -# next is to handle 401s to make sure a 404 -# is not hidden behind it. +# it now handles 401s too, but not like I wanted it to +# it uses a team password to HEAD non-public documents +# I make sure I don't air this password out of w3.org +# proper way would be: fork a HTTP server, and then proxy 401 +# between server and client +# caching credentials in HTTP Daemon may be difficult +# I'll look at this later +# it is the way to go if we ever want to integrate this into validator sub on_return { my $self = shift; my ($request,$response,$content) = @_; - if($response->code == 301 || $response->code == 302){ - $self->{URL}{Redirect}{$request->url} = $response->headers->header('Location'); - unless(defined $self->{URL}{Registered}{$response->headers->header('Location')}){ - $self->register(HTTP::Request->new(HEAD => $response->headers->header('Location')),\&callback_check,undef,0); - $self->{URL}{Registered}{$response->headers->header('Location')}="True"; + if($self->{DoAuth}){ + if($response->code == 301 || $response->code == 302){ + $self->{URL}{Redirect}{$request->url} = $response->headers->header('Location'); + unless(defined $self->{URL}{Registered}{$response->headers->header('Location')}){ + $self->register(HTTP::Request->new(HEAD => $response->headers->header('Location')),\&callback_check,undef,0); + $self->{URL}{Registered}{$response->headers->header('Location')}="True"; + } + } + if($response->code == 401 && !defined($self->{Auth}{$response->request->url})){ + if($response->request->url->netloc =~ /\.w3\.org$/){ + my $newreq = HTTP::Request->new(HEAD => $response->request->url); + $newreq->headers->authorization_basic("XXXXX","XXXXX"); + $self->{Auth}{$response->request->url} = 1; + $self->register($newreq,\&callback_check,undef,0); + } } } } @@ -55,11 +73,23 @@ use CGI qw(:standard); ############### # Global Variables -my $VERSION= '$Id: checklink.pl,v 1.14 1998-09-10 00:33:57 renaudb Exp $ '; +my $VERSION= '$Id: checklink.pl,v 1.15 1998-09-11 21:34:04 renaudb Exp $ '; my %ALLOWED_SCHEMES = ( "http" => 1 ); my %SCHEMES = (); # for report my %URL = (); -my %COLORS = ( 301 => ' BGCOLOR="yellow"', 302 => ' BGCOLOR="yellow"', 404 => ' BGCOLOR="red"' , 401 => ' BGCOLOR="aqua"' ); +my %COLORS = ( 200 => '', 301 => ' BGCOLOR="yellow"', 302 => ' BGCOLOR="yellow"', 404 => ' BGCOLOR="red"' , 401 => ' BGCOLOR="aqua"' ); +my %HTTP_CODES = ( 200 => 'ok' , + 201 => '201', + 301 => 'redirect' , + 302 => 'redirect' , + 401 => 'unauthorized' , + 404 => 'not found' , + 405 => '405', + 408 => '408', + 415 => '415', + 500 => '500', + 501 => '501', + 503 => '503'); my $VERBOSE = 0; my $CGI = 0; my $p = ParseLink->new(); # we want the parser to be global, so we can call it from callback_parse @@ -134,7 +164,6 @@ sub callback_check { sub checklinks { my $url=URI::URL->new(shift); my $q = shift if($CGI); - #my $ua = new LWP::Parallel::UserAgent; my $ua = new UserAgent; my $request = HTTP::Request->new(GET => $url); @@ -155,6 +184,7 @@ sub checklinks { my $res = $ua->wait(10); $ua->initialize; $ua->{URL}= \%URL; + $ua->{DoAuth} = 1; foreach my $r (map {$res->{$_}->response} keys %$res){ if ($r->is_success && $r->content_type =~ /text\/html/i){ @@ -177,21 +207,24 @@ sub checklinks { &print_result($url,$response,$q); } else { # error handling if error on fetching document to be checklink-ed + # I need to add a command-line mode here if($r->code == 401){ - print $q->h3($r->request->url); - $r->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; - my $realm = $1; if($CGI){ - print $q->h2('Authentication Required To Fetch '.$q->a({href=>$url},$url)); - print $q->startform('POST',$q->url); - print $q->textfield(-name=>'url',-size=>'50',-value=>$url),$q->br; - print $q->textfield(-name=>'username',-size=>'10'),"Username for Realm ",$realm,$q->br; - print $q->password_field(-name=>'password',-size=>'10'),"Password",$q->br; - print $q->submit('Proceed'); - print $q->endform; + print $q->h3($r->request->url); + $r->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; + my $realm = $1; + if($CGI){ + print $q->h2('Authentication Required To Fetch '.$q->a({href=>$url},$url)); + print $q->startform('POST',$q->url); + print $q->textfield(-name=>'url',-size=>'50',-value=>$url),$q->br; + print $q->textfield(-name=>'username',-size=>'10'),"Username for Realm ",$realm,$q->br; + print $q->password_field(-name=>'password',-size=>'10'),"Password",$q->br; + print $q->submit('Proceed'); + print $q->endform; + } } } else { - print $q->h2('Error '.$r->code) if($CGI); + print $q->h2('Error '.$r->code." ".$r->content_type) if($CGI); } } } @@ -209,7 +242,17 @@ sub print_result{ print $q->h2('Result for '.$q->a({href=>$url},$url)) if($CGI); print "<table border=\"1\"><TR ALIGN=\"center\"><TD>Lines</TD><TD>URI</TD><TD>Code</TD><TD>Extra</TD></TR>\n" if($CGI); + # this is to handle 401s and see what's behind them + # I need to watch this carefully, I might be wrong about these tests foreach my $resp (map {$response->{$_}->response} keys %$response){ + $URL{Responses}{$resp->request->url} = $resp unless($URL{Responses}{$resp->request->url} && $resp->code == 200); + $URL{Codes}{$resp->request->url} = $resp->code unless($URL{Codes}{$resp->request->url} && $resp->code == 401); + } + # loop that selects through all the responses to the HEADs to the links + # sorting can happen here + foreach my $resp (map {$URL{Responses}{$_}} sort keys %{$URL{Responses}}){ + # we pass links for which we don't have a line number + # (which means they are responses from 301/302 or 401 handling) if(keys %{$URL{$resp->request->url}}){ unless($resp->code eq "200"){ print "<TR><TD ALIGN=\"center\">" if ($CGI); @@ -217,10 +260,13 @@ sub print_result{ if ($CGI){ print "</TD><TD".$COLORS{$resp->code}.">"; print "<B>".$q->a({href=>$resp->request->url},$resp->request->url)."</B>"; - print $q->br.&recurse_redirect($resp->request->url,$q) if($resp->code == 301 || $resp->code == 302); - print "</TD><TD ALIGN=\"center\">".$resp->code."</TD><TD>"; + if($URL{Codes}{$resp->request->url} == 301 || $URL{Codes}{$resp->request->url} == 302){ + print $q->br.&recurse_redirect($resp->request->url,$q); + } else { + print "</TD><TD ALIGN=\"center\"".$COLORS{$URL{Codes}{$resp->request->url}}.">".$HTTP_CODES{$URL{Codes}{$resp->request->url}}."</TD><TD>"; + } } - print " ",$resp->request->url, ": ",$resp->code," " if($VERBOSE); + print " ",$resp->request->url, ": ",$URL{Codes}{$resp->request->url}," " if($VERBOSE); print $resp->headers->www_authenticate if($resp->code == 401); print "</TD></TR>\n" if($CGI); print "\n" if($VERBOSE); @@ -233,7 +279,7 @@ sub print_result{ } } } - print "</table>\n";# if($CGI); + print "</table>\n" if($CGI); } # I'll add links to source later @@ -250,6 +296,6 @@ sub html_header { sub recurse_redirect { my $url = shift; my $q = shift; - my $rec = $URL{Redirect}{$URL{Redirect}{$url}}? "<BR>".&recurse_redirect($URL{Redirect}{$url},$q):""; + my $rec = $URL{Redirect}{$URL{Redirect}{$url}}? "<BR>".&recurse_redirect($URL{Redirect}{$url},$q):"</TD><TD ALIGN=\"center\">".$HTTP_CODES{$URL{Codes}{$URL{Redirect}{$url}}}."</TD><TD>"; return "-> ".$q->a({href=>$URL{Redirect}{$url}},$URL{Redirect}{$url}).$rec; } |