summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorot <ot@localhost>2007-05-18 00:54:52 +0000
committerot <ot@localhost>2007-05-18 00:54:52 +0000
commit037cc3c818c360a3bce703ef680329d971fd3457 (patch)
treef829a28d19796869e70addae9e70daacdb587dd0
parent4367be154c886351240960830597fc536a1f859b (diff)
downloadmarkup-validator-037cc3c818c360a3bce703ef680329d971fd3457.zip
markup-validator-037cc3c818c360a3bce703ef680329d971fd3457.tar.gz
markup-validator-037cc3c818c360a3bce703ef680329d971fd3457.tar.bz2
The fix for bug 4474 actually broke the transcoding routine for some versions of Encode lib.
http://lists.w3.org/Archives/Public/www-validator-cvs/2007Apr/0159.html This patch: * fixes the transcoding routine back to normal (hence removing mistaken "non-sgml character" for unicode content) * makes sure that content we output is properly encoded from perl's internal to utf-8 * adds proper decoding/encoding for tidy processing
-rwxr-xr-xhttpd/cgi-bin/check17
1 files changed, 11 insertions, 6 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index a71cffa..91d1f97 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.520 2007-05-17 04:30:00 ot Exp $
+# $Id: check,v 1.521 2007-05-18 00:54:52 ot Exp $
#
# Disable buffering on STDOUT!
@@ -32,6 +32,7 @@ use 5.008;
# Pragmas.
use strict;
use warnings;
+use utf8;
#
# Modules. See also the BEGIN block further down below.
@@ -180,7 +181,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.520 $;
+ $VERSION = q$Revision: 1.521 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -846,7 +847,7 @@ if (! $File->{'Is Valid'}) {
require HTML::Tidy;
my $tidy = HTML::Tidy->new({config_file => $CFG->{Paths}->{TidyConf}});
- $File->{'Tidy'} = $tidy->clean(join"\n",@{$File->{Content}});
+ $File->{'Tidy'} = Encode::decode('utf-8', $tidy->clean(join"\n",@{$File->{Content}}));
$File->{'Tidy_OK'} = TRUE;
};
if ($@) {
@@ -908,7 +909,8 @@ $template->param(file_source => &source($File))
# if $template->param('opt_show_raw_errors');
# $T->param(file_outline => &outline($File)) if $T->param('opt_show_outline');
-print $template->output;
+# transcode output from perl's internal to utf-8 and output
+print Encode::encode('UTF-8', $template->output);
#
# Get rid of $File object and exit.
@@ -2045,7 +2047,7 @@ sub transcode {
# Try to transcode
eval {
- $output = Encode::encode("utf8", Encode::decode($cs, $input, Encode::FB_CROAK));
+ $output = Encode::decode($cs, $input, Encode::FB_CROAK);
};
# Transcoding failed
@@ -2071,6 +2073,8 @@ sub transcode {
# tentative fix for http://www.w3.org/Bugs/Public/show_bug.cgi?id=3992
$output =~ s/(\r\n|\n|\r)/\n/g;
+ #debug: we could check if the content has utf8 bit on with
+ #$output= utf8::is_utf8($output) ? 1 : 0;
$File->{Content} = [split/\n/, $output];
return $File;
@@ -2123,7 +2127,8 @@ sub abort_if_error_flagged {
if ($File->{Opt}->{Output} eq 'html') {
&prep_template($File, $File->{Templates}->{Error});
- print $File->{Templates}->{Error}->output;
+ # transcode output from perl's internal to utf-8 and output
+ print Encode::encode('UTF-8',$File->{Templates}->{Error}->output);
exit;
} else {