diff options
author | ot <ot@localhost> | 2007-05-18 00:54:52 +0000 |
---|---|---|
committer | ot <ot@localhost> | 2007-05-18 00:54:52 +0000 |
commit | 037cc3c818c360a3bce703ef680329d971fd3457 (patch) | |
tree | f829a28d19796869e70addae9e70daacdb587dd0 | |
parent | 4367be154c886351240960830597fc536a1f859b (diff) | |
download | markup-validator-037cc3c818c360a3bce703ef680329d971fd3457.zip markup-validator-037cc3c818c360a3bce703ef680329d971fd3457.tar.gz markup-validator-037cc3c818c360a3bce703ef680329d971fd3457.tar.bz2 |
The fix for bug 4474 actually broke the transcoding routine for some versions of Encode lib.
http://lists.w3.org/Archives/Public/www-validator-cvs/2007Apr/0159.html
This patch:
* fixes the transcoding routine back to normal
(hence removing mistaken "non-sgml character" for unicode content)
* makes sure that content we output is properly encoded from perl's internal to utf-8
* adds proper decoding/encoding for tidy processing
-rwxr-xr-x | httpd/cgi-bin/check | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index a71cffa..91d1f97 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -14,7 +14,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.520 2007-05-17 04:30:00 ot Exp $ +# $Id: check,v 1.521 2007-05-18 00:54:52 ot Exp $ # # Disable buffering on STDOUT! @@ -32,6 +32,7 @@ use 5.008; # Pragmas. use strict; use warnings; +use utf8; # # Modules. See also the BEGIN block further down below. @@ -180,7 +181,7 @@ Directory not readable (permission denied): @_r # # Strings - $VERSION = q$Revision: 1.520 $; + $VERSION = q$Revision: 1.521 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; # @@ -846,7 +847,7 @@ if (! $File->{'Is Valid'}) { require HTML::Tidy; my $tidy = HTML::Tidy->new({config_file => $CFG->{Paths}->{TidyConf}}); - $File->{'Tidy'} = $tidy->clean(join"\n",@{$File->{Content}}); + $File->{'Tidy'} = Encode::decode('utf-8', $tidy->clean(join"\n",@{$File->{Content}})); $File->{'Tidy_OK'} = TRUE; }; if ($@) { @@ -908,7 +909,8 @@ $template->param(file_source => &source($File)) # if $template->param('opt_show_raw_errors'); # $T->param(file_outline => &outline($File)) if $T->param('opt_show_outline'); -print $template->output; +# transcode output from perl's internal to utf-8 and output +print Encode::encode('UTF-8', $template->output); # # Get rid of $File object and exit. @@ -2045,7 +2047,7 @@ sub transcode { # Try to transcode eval { - $output = Encode::encode("utf8", Encode::decode($cs, $input, Encode::FB_CROAK)); + $output = Encode::decode($cs, $input, Encode::FB_CROAK); }; # Transcoding failed @@ -2071,6 +2073,8 @@ sub transcode { # tentative fix for http://www.w3.org/Bugs/Public/show_bug.cgi?id=3992 $output =~ s/(\r\n|\n|\r)/\n/g; + #debug: we could check if the content has utf8 bit on with + #$output= utf8::is_utf8($output) ? 1 : 0; $File->{Content} = [split/\n/, $output]; return $File; @@ -2123,7 +2127,8 @@ sub abort_if_error_flagged { if ($File->{Opt}->{Output} eq 'html') { &prep_template($File, $File->{Templates}->{Error}); - print $File->{Templates}->{Error}->output; + # transcode output from perl's internal to utf-8 and output + print Encode::encode('UTF-8',$File->{Templates}->{Error}->output); exit; } else { |