diff options
author | gerald <gerald@localhost> | 2001-02-20 10:47:53 +0000 |
---|---|---|
committer | gerald <gerald@localhost> | 2001-02-20 10:47:53 +0000 |
commit | 977f056bf54bce8cd4db3da88ce5d3122dbf62bf (patch) | |
tree | d004a29b96091ed50813263c8e446e7d88ebb3e7 | |
parent | 169f0637f981d6dcfc01ce3df1fd500c007a42d3 (diff) | |
download | markup-validator-977f056bf54bce8cd4db3da88ce5d3122dbf62bf.zip markup-validator-977f056bf54bce8cd4db3da88ce5d3122dbf62bf.tar.gz markup-validator-977f056bf54bce8cd4db3da88ce5d3122dbf62bf.tar.bz2 |
handle different encodings using iconv instead of nkf,
using a patch from Takuya Asada
-rwxr-xr-x | httpd/cgi-bin/check | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 20c77ab..eca50c5 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -8,7 +8,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.76 2001-02-20 09:41:29 gerald Exp $ +# $Id: check,v 1.77 2001-02-20 10:47:53 gerald Exp $ # # We need Perl 5.004. @@ -61,7 +61,7 @@ my $temp = "/tmp/validate.$$"; # @@ Use POSIX/IO::File tmpfiles instead! # # Executables and binaries my $sp = '/usr/local/bin/lq-nsgmls'; -my $nkf = '/usr/local/bin/nkf'; +my $iconv = '/usr/bin/iconv'; my $weblint = '/usr/bin/weblint'; # @@ -74,9 +74,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/'; # # Strings -$VERSION = q$Revision: 1.76 $; +$VERSION = q$Revision: 1.77 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; -$DATE = q$Date: 2001-02-20 09:41:29 $; +$DATE = q$Date: 2001-02-20 10:47:53 $; $MAINTAINER = 'gerald@w3.org'; my $notice = ''; # "<p><strong>Note: This service will be ...</strong>"; @@ -182,7 +182,7 @@ $q->param('uri', 'http://' . $q->param('uri')) if $q->param('uri') =~ m(^www)i; # We save it in a string instead of printing it in case we need to abort before # we have any meaningfull results to report. @@ May not be necessary! my $header = <<"EOF"; -Content-Type: text/html +Content-Type: text/html; charset=utf-8 $html40t_doctype <html> @@ -286,6 +286,8 @@ foreach my $line (@{$File->{Content}}) { if ($line =~ /charset\s*=[\s\"]*([^\s;\">]*)/i) { $File->{META_Charset} = lc $1; last; + } elsif ($line =~ /<body/i || $line =~ /<body/i) { + last; } } } @@ -304,13 +306,12 @@ if ($File->{HTTP_Charset}) { # # Setup conversion filter and SP environment for the effective charset. my $codeconv = ''; -if ($File->{Charset} eq 'iso-2022-jp') { - $codeconv = "$nkf -Jex | "; -} elsif ($File->{Charset} eq 'utf-8') { +if ($File->{Charset} ne 'unknown' and $File->{Charset} ne 'us-ascii') { $ENV{SP_CHARSET_FIXED} = 'YES'; $ENV{SP_ENCODING} = 'utf-8'; -} elsif ($File->{Charset} eq 'shift_jis') { - $codeconv = "$nkf -Sex | "; + if ($File->{Charset} ne 'utf-8') { + $codeconv = "$iconv -f $File->{Charset} -t utf-8 |"; + } } # |