summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgerald <gerald@localhost>2001-02-20 10:47:53 +0000
committergerald <gerald@localhost>2001-02-20 10:47:53 +0000
commit977f056bf54bce8cd4db3da88ce5d3122dbf62bf (patch)
treed004a29b96091ed50813263c8e446e7d88ebb3e7
parent169f0637f981d6dcfc01ce3df1fd500c007a42d3 (diff)
downloadmarkup-validator-977f056bf54bce8cd4db3da88ce5d3122dbf62bf.zip
markup-validator-977f056bf54bce8cd4db3da88ce5d3122dbf62bf.tar.gz
markup-validator-977f056bf54bce8cd4db3da88ce5d3122dbf62bf.tar.bz2
handle different encodings using iconv instead of nkf,
using a patch from Takuya Asada
-rwxr-xr-xhttpd/cgi-bin/check21
1 files changed, 11 insertions, 10 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 20c77ab..eca50c5 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -8,7 +8,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.76 2001-02-20 09:41:29 gerald Exp $
+# $Id: check,v 1.77 2001-02-20 10:47:53 gerald Exp $
#
# We need Perl 5.004.
@@ -61,7 +61,7 @@ my $temp = "/tmp/validate.$$"; # @@ Use POSIX/IO::File tmpfiles instead!
#
# Executables and binaries
my $sp = '/usr/local/bin/lq-nsgmls';
-my $nkf = '/usr/local/bin/nkf';
+my $iconv = '/usr/bin/iconv';
my $weblint = '/usr/bin/weblint';
#
@@ -74,9 +74,9 @@ my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
#
# Strings
-$VERSION = q$Revision: 1.76 $;
+$VERSION = q$Revision: 1.77 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
-$DATE = q$Date: 2001-02-20 09:41:29 $;
+$DATE = q$Date: 2001-02-20 10:47:53 $;
$MAINTAINER = 'gerald@w3.org';
my $notice = ''; # "<p><strong>Note: This service will be ...</strong>";
@@ -182,7 +182,7 @@ $q->param('uri', 'http://' . $q->param('uri')) if $q->param('uri') =~ m(^www)i;
# We save it in a string instead of printing it in case we need to abort before
# we have any meaningfull results to report. @@ May not be necessary!
my $header = <<"EOF";
-Content-Type: text/html
+Content-Type: text/html; charset=utf-8
$html40t_doctype
<html>
@@ -286,6 +286,8 @@ foreach my $line (@{$File->{Content}}) {
if ($line =~ /charset\s*=[\s\"]*([^\s;\">]*)/i) {
$File->{META_Charset} = lc $1;
last;
+ } elsif ($line =~ /<body/i || $line =~ /<body/i) {
+ last;
}
}
}
@@ -304,13 +306,12 @@ if ($File->{HTTP_Charset}) {
#
# Setup conversion filter and SP environment for the effective charset.
my $codeconv = '';
-if ($File->{Charset} eq 'iso-2022-jp') {
- $codeconv = "$nkf -Jex | ";
-} elsif ($File->{Charset} eq 'utf-8') {
+if ($File->{Charset} ne 'unknown' and $File->{Charset} ne 'us-ascii') {
$ENV{SP_CHARSET_FIXED} = 'YES';
$ENV{SP_ENCODING} = 'utf-8';
-} elsif ($File->{Charset} eq 'shift_jis') {
- $codeconv = "$nkf -Sex | ";
+ if ($File->{Charset} ne 'utf-8') {
+ $codeconv = "$iconv -f $File->{Charset} -t utf-8 |";
+ }
}
#