#!/usr/bin/perl -T
#
# W3C MarkUp Validation Service
# A CGI script to retrieve and validate a MarkUp file
#
# Copyright 1995-2002 Gerald Oskoboiny <gerald@w3.org>
# for additional contributors, see http://dev.w3.org/cvsweb/validator/
#
# This source code is available under the license at:
#     http://www.w3.org/Consortium/Legal/copyright-software
#
# $Id: check,v 1.320 2002-12-08 17:51:46 ville Exp $

#
# Disable buffering on STDOUT!
$| = 1;

#
# We need Perl 5.6.0+.
use 5.006;

###############################################################################
#### Load modules. ############################################################
###############################################################################

#
# Pragmas.
use strict;
use warnings;

#
# Modules.
#
# Version numbers given where we absolutely need a minimum version of a given
# module (gives nicer error messages). By default, add an empty import list
# when loading modules to prevent non-OO or poorly written modules from
# polluting our namespace.
#
use CGI             2.81 qw(
			    -newstyle_urls
			    -private_tempfiles
			    redirect
			   ); # 2.81 for XHTML, and import redirect() function.

use CGI::Carp            qw(carp croak fatalsToBrowser);
use Config::General 2.06 qw(); # Need 2.06 for -SplitPolicy
use File::Spec           qw();
use HTML::Parser    3.25 qw(); # Need 3.25 for $p->ignore_elements.
use HTML::Template  2.6  qw();
use HTTP::Request        qw();
use IO::File             qw();
use IPC::Open3           qw(open3);
use LWP::UserAgent  1.90 qw(); # Need 1.90 for protocols_(allowed|forbidden)
use Set::IntSpan         qw();
use Text::Iconv          qw();
use Text::Wrap           qw(wrap);
use URI                  qw();
use URI::Escape          qw(uri_escape);


###############################################################################
#### Constant definitions. ####################################################
###############################################################################

#
# Define global constants
use constant TRUE  => 1;
use constant FALSE => 0;

#
# Tentative Validation Severities.
use constant T_DEBUG =>  1; # 0000 0001
use constant T_INFO  =>  2; # 0000 0010
use constant T_WARN  =>  4; # 0000 0100
use constant T_ERROR =>  8; # 0000 1000
use constant T_FATAL => 16; # 0001 0000

#
# Output flags for error processing
use constant O_SOURCE  =>  1; # 0000 0001
use constant O_CHARSET =>  2; # 0000 0010
use constant O_DOCTYPE =>  4; # 0000 0100


#
# Define global variables.
use vars qw($DEBUG $CFG $VERSION);


#
# Things inside BEGIN don't happen on every request in persistent
# environments, such as mod_perl.  So let's do globals, eg. read config here.
BEGIN {

  #
  # Read Config Files.
  eval {
    my %config_opts =
      (-ConfigFile     => $ENV{W3C_VALIDATOR_CFG} || '/etc/w3c/validator.conf',
       -MergeDuplicateOptions => 'yes',
       -SplitPolicy      => 'equalsign',
       -UseApacheInclude => TRUE,
       -IncludeRelative  => TRUE,
       -InterPolateVars  => TRUE,
       -DefaultConfig    => { Allowed_Protocols => 'http,https',
                              SGML_Parser       => '/usr/bin/onsgmls',
                            },
      );
    my %cfg = Config::General->new(%config_opts)->getall();
    $CFG = \%cfg;
  };
  if ($@) {
    die <<".EOF.";
Couldn't read configuration.  Set the W3C_VALIDATOR_CFG environment variable
or copy conf/* to /etc/w3c/, and make sure that the configuration file, as
well as all included files are readable by the web server user.
The error reported was: '$@'
.EOF.
  }
  # Split allowed protocols into a list.
  # We could use the array / identical values feature of Config::General,
  # but that has unwanted effects when the only array type option we need is
  # Allowed_Protocols.
  if (my $allowed = delete($CFG->{Allowed_Protocols})) {
    $CFG->{Allowed_Protocols} = [ split(/\s*,\s*/, $allowed) ];
  }

  #
  # Make sure onsgmls exists and is executable.
  unless (-x $CFG->{SGML_Parser}) {
    die qq(Configured SGML Parser "$CFG->{SGML_Parser}" not executable!\n);
  }

  { # Make types config indexed by FPI.
    my $_types = {};
    map {$_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_}}
      keys %{$CFG->{Types}};
    $CFG->{Types} = $_types;
  }

  #
  # Set debug flag.
  $DEBUG = TRUE if $ENV{W3C_VALIDATOR_DEBUG} || $CFG->{DEBUG};

  #
  # Strings
  $VERSION    =  q$Revision: 1.320 $;
  $VERSION    =~ s/Revision: ([\d\.]+) /$1/;

} # end of BEGIN block.

#
# Get rid of (possibly insecure) $PATH.
delete $ENV{PATH};


#use Data::Dumper qw(Dumper);
#print Dumper($CFG);
#exit;
###############################################################################
#### Process CGI variables and initialize. ####################################
###############################################################################

#
# Create a new CGI object.
my $q = new CGI;

#
# The data structure that will hold all session data.
my $File;


##############################################
# Populate $File->{Env} -- Session Metadata. #
##############################################

#
# The URL to this CGI Script.
$File->{Env}->{'Self URI'} = $q->url(-query => 0);


#################################
# Initialize the datastructure. #
#################################

#
# Charset data (casing policy: lowercase early).
$File->{Charset}->{Use}      = ''; # The charset used for validation.
$File->{Charset}->{Auto}     = ''; # Autodetection using XML rules (Appendix F)
$File->{Charset}->{HTTP}     = ''; # From HTTP's "charset" parameter.
$File->{Charset}->{META}     = ''; # From HTML's <meta http-equiv>.
$File->{Charset}->{XML}      = ''; # From the XML Declaration.
$File->{Charset}->{Override} = ''; # From CGI/user override.

#
# Misc simple types.
$File->{Type} = '';

#
# Array (ref) used to store character offsets for the XML report.
$File->{Offsets}->[0] = [0, 0]; # The first item isn't used...

#
# Listrefs.
$File->{Lines}              = []; # Line numbers for encoding errors.
$File->{Warnings}           = []; # Warnings...
$File->{'Other Namespaces'} = []; # Other (non-root) Namespaces.


###############################################################################
#### Generate Template for Result. ############################################
###############################################################################

my $T = HTML::Template->new(
			    filename => '/usr/local/validator/share/templates/en_US/result.tmpl',
			    die_on_bad_params => FALSE,
			   );
my $E = HTML::Template->new(
			    filename => '/usr/local/validator/share/templates/en_US/fatal-error.tmpl',
			    die_on_bad_params => FALSE,
			   );

$T->param(cfg_home_page => $CFG->{Home_Page});


#########################################
# Populate $File->{Opt} -- CGI Options. #
#########################################

#
# Preprocess the CGI parameters.
$q = &prepCGI($File, $q);

#
# Set session switches.
$File->{Opt}->{'Outline'}        = $q->param('outline') ? TRUE                   :  FALSE;
$File->{Opt}->{'Show Source'}    = $q->param('ss')      ? TRUE                   :  FALSE;
$File->{Opt}->{'Show Parsetree'} = $q->param('sp')      ? TRUE                   :  FALSE;
$File->{Opt}->{'No Attributes'}  = $q->param('noatt')   ? TRUE                   :  FALSE;
$File->{Opt}->{'Show ESIS'}      = $q->param('esis')    ? TRUE                   :  FALSE;
$File->{Opt}->{'Show Errors'}    = $q->param('errors')  ? TRUE                   :  FALSE;
$File->{Opt}->{'Verbose'}        = $q->param('verbose') ? TRUE                   :  FALSE;
$File->{Opt}->{'Debug'}          = $q->param('debug')   ? TRUE                   :  FALSE;
$File->{Opt}->{'No200'}          = $q->param('No200')   ? TRUE                   :  FALSE;
$File->{Opt}->{'Charset'}        = $q->param('charset') ? lc $q->param('charset'):     '';
$File->{Opt}->{'DOCTYPE'}        = $q->param('doctype') ? $q->param('doctype')   :     '';
$File->{Opt}->{'URI'}            = $q->param('uri')     ? $q->param('uri')       :     '';
$File->{Opt}->{'Output'}         = $q->param('output')  ? $q->param('output')    : 'html';

#
# If ";debug" was given, let it overrule the value from the config file,
# regardless of whether it's "0" or "1" (on or off).
$DEBUG = $q->param('debug') if defined $q->param('debug');

&abort_if_error_flagged($File, 0);

#
# Get the file and metadata.
if ($q->param('uploaded_file')) {
  $File = &handle_file($q, $File);
} elsif ($q->param('fragment')) {
  $File = &handle_frag($q, $File);
} elsif ($q->param('uri')) {
  $File = &handle_uri($q, $File);
}

#
# Abort if an error was flagged during initialization.
&abort_if_error_flagged($File, 0);

#
# Get rid of the CGI object.
undef $q;

#
# We don't need STDIN any more, so get rid of it to avoid getting clobbered
# by Apache::Registry's idiotic interference under mod_perl.
untie *STDIN;


###############################################################################
#### Output validation results. ###############################################
###############################################################################

#
# Add warning if we were redirected.
unless (URI::eq("$File->{Opt}->{URI}", $File->{URI}) and not $File->{'Is Upload'}) {
  &add_warning(
	       $File, 'Note:',
	       sprintf(
		       'The URI you gave me, &lt;%s&gt;, returned a redirect to &lt;%s&gt;.',
		       &ent($File->{Opt}->{URI}),
		       &ent($File->{URI}),
		      )
	      );
}

#
# Find the XML Encoding.
$File = &find_xml_encoding($File);

#
# Decide on a charset to use (first part)
#
if ($File->{Charset}->{HTTP}) { # HTTP, if given, is authoritative.
  $File->{Charset}->{Use} = $File->{Charset}->{HTTP};
} elsif ($File->{ContentType} =~ m(^text/([-.a-zA-Z0-9]\+)?xml$)) {
  # Act as if $http_charset was 'us-ascii'. (MIME rules)
  $File->{Charset}->{Use} = 'us-ascii';
  my @_source;
  if ($File->{'Is Upload'}) {
    @_source = ('sent by your web browser', ($File->{Server}||'unknown'), 'browser send');
  } else {
    @_source = ('returned by your web server', ($File->{Server}||'unknown'), 'server return');
  }
  &add_warning($File, 'Note:', <<".EOF.");
      The HTTP Content-Type header $_source[0] ($_source[1]) did not contain
      a "charset" parameter, but the Content-Type was one of the XML text/*
      sub-types (<code>$File->{ContentType}</code>). The relevant
      specification (RFC 3023) specifies a strong default of "us-ascii" for
      such documents so we will use this value regardless of any encoding you
      may have indicated elsewhere. If you would like to use a different
      encoding, you should arrange to have your $_source[2] this new encoding
      information.
.EOF.
} elsif ($File->{Charset}->{XML}) {
  $File->{Charset}->{Use} = $File->{Charset}->{XML};
} elsif ($File->{Charset}->{Auto} =~ /^utf-16[bl]e$/ && $File->{BOM} == 2) {
  $File->{Charset}->{Use} = 'utf-16';
} elsif ($File->{ContentType} =~ m(^application/([-.a-zA-Z0-9]+\+)?xml$)) {
  $File->{Charset}->{Use} = "utf-8";
} elsif (&is_xml($File) and not $File->{ContentType} =~ m(^text/)) {
  $File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.)
}


$File->{Content} = &normalize_newlines($File->{Bytes},
                       exact_charset($File, $File->{Charset}->{Use}));
$File->{Content}->[0] = substr $File->{Content}->[0], $File->{BOM}; # remove BOM
#### add warning about BOM in UTF-8

#
# Try to extract META charset
# (works only if ascii-based and reasonably clean before <meta>)
$File = &preparse($File);
unless ($File->{Charset}->{Use}) {
  $File->{Charset}->{Use} = $File->{Charset}->{META};
}

if (&conflict($File->{Opt}->{Charset}, '(detect automatically)')) {
  my ($override, undef) = split(/\s/, $File->{Opt}->{Charset}, 2);
  $File->{Charset}->{Use} = $File->{Charset}->{Override} = lc($override);
  # message about 'charset override' in effect comes later
}

unless ($File->{Charset}->{Use}) {
  $File->{'Error Flagged'} = TRUE;
  $File->{'Error Message'} = <<".EOF.";
    <p>
      I was not able to extract a character encoding labeling from any of
      the valid sources for such information. Without encoding information it
      is impossible to validate the document. The sources I tried are:
    </p>
    <ul>
      <li>The HTTP Content-Type field.</li>
      <li>The XML Declaration.</li>
      <li>The HTML "META" element.</li>
    </ul>
    <p>
      And I even tried to autodetect it using the algorithm defined in
      <a href="http://www.w3.org/TR/REC-xml#sec-guessing">Appendix F of
        the XML 1.0 Recommendation</a>.
    </p>
    <p>
      Since none of these sources yielded any usable information, I will not be
      able to validate this document. Sorry. Please make sure you specify the
      character encoding in use.
    </p>
    <p>
      <a href="http://www.iana.org/"><acronym
      title="Internet Assigned Numbers Authority">IANA</acronym></a>
      maintains the list of <a
      href="http://www.iana.org/assignments/character-sets">official
      names for character sets</a>.
    </p>
.EOF.
}

#
# Abort if an error was flagged while finding the encoding.
&abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE);


#
# Check the detected Encoding and transcode.
if (&conflict($File->{Charset}->{Use}, 'utf-8')) {
  $File = &transcode($File);
  &abort_if_error_flagged($File, 0);
}


$File = &check_utf8($File); # always check
$File = &byte_error($File);

#
# Abort if an error was flagged during transcoding
&abort_if_error_flagged($File, O_SOURCE);


#
# Overall parsing algorithm for documents returned as text/html:
#
# For documents that come to us as text/html,
#
#  1. check if there's a doctype
#  2. if there is a doctype, parse/validate against that DTD
#  3. if no doctype, check for an xmlns= attribute on the first element
#  4. if there is an xmlns= attribute, check for XML well-formedness
#  5. if there is no xmlns= attribute, and no DOCTYPE, punt.
#

#
# Override DOCTYPE if user asked for it.
if ($File->{Opt}->{DOCTYPE}
    and not $File->{Opt}->{DOCTYPE} =~ /(Inline|detect)/i) {
  $File = &override_doctype($File);
  my $dtd = ent($File->{Opt}->{DOCTYPE});
  &add_warning($File, 'DOCTYPE Override in effect!', <<".EOF.");
  Any DOCTYPE Declaration in the document has been suppressed and the DOCTYPE
  for &#171;<code>$dtd</code>&#187; inserted instead. The document will not be
  Valid until you alter the source file to reflect this new DOCTYPE.
.EOF.
  $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
}

#
# Try to extract a DOCTYPE or xmlns.
$File = &preparse($File);


#
# Set document type to XHTML if the DOCTYPE was for XHTML.
# Set document type to MathML if the DOCTYPE was for MathML.
# This happens when the file is served as text/html
$File->{Type} = 'xhtml+xml'  if $File->{DOCTYPE} =~ /xhtml/i;
$File->{Type} = 'mathml+xml' if $File->{DOCTYPE} =~ /mathml/i;


#
# Sanity check Charset information and add any warnings necessary.
$File = &charset_conflicts($File);

#
# By default, use SGML catalog file and SGML Declaration.
my $catalog  = File::Spec->catfile($CFG->{SGML_Library}, 'sgml.soc');
my @xmlflags = qw(
                  -R
                  -wvalid
                  -wnon-sgml-char-ref
                  -wno-duplicate
                 );

#
# Switch to XML semantics if file is XML.
if (&is_xml($File)) {
  $catalog  = File::Spec->catfile($CFG->{SGML_Library}, 'xml.soc');
  push(@xmlflags, '-wxml');
  &add_warning($File, 'Note:', <<".EOF.");
  The Validator XML support has
  <a href="http://openjade.sourceforge.net/doc/xml.htm"
    title="Limitations in Validator XML support">some limitations</a>.
.EOF.
}


#
# Defaults for SP; turn off fixed charset mode and set encoding to UTF-8.
$ENV{SP_CHARSET_FIXED} = 'NO';
$ENV{SP_ENCODING}      = 'UTF-8';
$ENV{SP_BCTF}          = 'UTF-8';

#
# Tell onsgmls about the SGML Library.
$ENV{SGML_SEARCH_PATH} = $CFG->{SGML_Library};

#
# Set final command to use.
my @cmd = ($CFG->{SGML_Parser}, '-c', $catalog, '-E0', @xmlflags);

#
# Set debug info for HTML report.
$T->param(is_debug => $DEBUG);
$T->param(
	  debug => [
		    {name => 'Command',          value => &ent("@cmd")},
		    {name => 'SP_CHARSET_FIXED', value => &ent($ENV{SP_CHARSET_FIXED})},
		    {name => 'SP_ENCODING',      value => &ent($ENV{SP_ENCODING})},
		    {name => 'SP_BCTF',          value => &ent($ENV{SP_BCTF})},
		   ],
	 );


#
# Temporary filehandles.
my $spin  = IO::File->new_tmpfile;
my $spout = IO::File->new_tmpfile;
my $sperr = IO::File->new_tmpfile;

#
# Dump file to a temp file for parsing.
for (@{$File->{Content}}) {
  print $spin $_, "\n";
}

#
# seek() to beginning of the file.
seek $spin, 0, 0;

#
# Run it through SP, redirecting output to temporary files.
my $pid = do {
  no warnings 'once';
  local(*SPIN, *SPOUT, *SPERR)  = ($spin, $spout, $sperr);
  open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd);
};


#
# Close input file, reap the kid, and rewind temporary filehandles.
undef $spin;
waitpid $pid, 0;
seek $_, 0, 0 for $spout, $sperr;

$File = &parse_errors($File, $sperr); # Parse error output.
undef $sperr; # Get rid of no longer needed filehandle.

$File->{ESIS} = [];
my $elements_found = 0;
while (<$spout>) {
  push @{$File->{'DEBUG'}->{ESIS}}, $_;
  $elements_found++ if /^\(/;

  if (/^Axmlns() \w+ (.*)/ or /^Axmlns:([^ ]+) \w+ (.*)/) {
    if (not $File->{Namespace} and $elements_found == 0 and $1 eq "") {
      $File->{Namespace} = $2;
    }
    $File->{Namespaces}->{$2}++ unless $2 eq $File->{Namespace};
  }

  next if / IMPLIED$/;
  next if /^ASDAFORM CDATA /;
  next if /^ASDAPREF CDATA /;
  chomp; # Removes trailing newlines
  push @{$File->{ESIS}}, $_;
}
undef $spout;

#
# Check whether the parser thought it was Valid.
if ($File->{ESIS}->[-1] =~ /^C$/) {
  delete $File->{ESIS}->[-1];
  $File->{'Is Valid'} = TRUE;
} else {
  $File->{'Is Valid'} = FALSE;
}

#
# Extract the Namespaces.
$File->{Namespaces} = [map {name => '', uri => $_}, keys %{$File->{Namespaces}}];

#
# Set Version to be the FPI initially.
$File->{Version} = $File->{DOCTYPE};

#
# Extract any version attribute from the ESIS.
for (@{$File->{ESIS}}) {
  no warnings 'uninitialized';
  next unless /^AVERSION CDATA (.*)/;
  $File->{Version} = $1;
  last;
}

#
# Force "XML" if type is an XML type and an FPI was not found.
# Otherwise set the type to be the FPI.
if (&is_xml($File) and not $File->{DOCTYPE}) {
  $File->{Version} = 'XML';
} else {
  $File->{Version} = $File->{DOCTYPE} unless $File->{Version};
}

#
# Get the pretty text version of the FPI if a mapping exists.
if (my $prettyver = $CFG->{Types}->{$File->{Version}}->{Display}) {
  $File->{Version} = $prettyver;
} else {
  $File->{Version} = &ent($File->{Version});
}

#
# Warn about unknown Namespaces.
if (&is_xml($File) and $File->{Namespace}) {
  my $ns = &ent($File->{Namespace});
  if (&is_xhtml($File) and $File->{Namespace} ne 'http://www.w3.org/1999/xhtml') {
    &add_warning(
		 $File, 'Warning:',
		 "Unknown namespace (&#171;<code>$ns</code>&#187;) for text/html document!"
		);
  } elsif (&is_svg($File) and $File->{Namespace} ne 'http://www.w3.org/2000/svg') {
    &add_warning(
		 $File, 'Warning:',
		 "Unknown namespace (&#171;<code>$ns</code>&#187;) for SVG document!"
		);
  }
}

if (defined $File->{Tentative}) {
  my $class = '';
     $class .= ($File->{Tentative} & T_INFO  ? ' info'    :'');
     $class .= ($File->{Tentative} & T_WARN  ? ' warning' :'');
     $class .= ($File->{Tentative} & T_ERROR ? ' error'   :'');
     $class .= ($File->{Tentative} & T_FATAL ? ' fatal'   :'');

  unless ($File->{Tentative} == T_DEBUG) {
    $File->{Notice} = <<".EOF.";
    <p id="Notice" class="$class">
      Please note that you have chosen one or more options that alter the
      content of the document before validation, or have not provided enough
      information to accurately validate the document. Even if no errors are
      reported below, the document will not be valid until you manually make
      the changes we have performed automatically. Specifically, if you used
      some of the options that override a property of the document (e.g. the
      DOCTYPE or Character Encoding), you must make the same change to the
      source document or the server setup before it can be valid. You will
      also need to insert an appropriate DOCTYPE Declaration or Character
      Encoding (the "charset" parameter for the Content-Type HTTP header) if
      any of those are missing.
    </p>
.EOF.
  }
}

unless ($File->{Opt}->{Verbose}) {
  unless ($File->{'Is Upload'}) {
    my $thispage = $File->{Env}->{'Self URI'};
    my $escaped_uri = uri_escape($File->{URI});
    $thispage .= qq(?uri=$escaped_uri);
    $thispage .= ';ss=1'      if $File->{Opt}->{'Show Source'};
    $thispage .= ';sp=1'      if $File->{Opt}->{'Show Parsetree'};
    $thispage .= ';noatt=1'   if $File->{Opt}->{'No Attributes'};
    $thispage .= ';outline=1' if $File->{Opt}->{'Outline'};

    &add_warning($File, 'Note:', <<".EOF.");
        You can enable <a href="$thispage;verbose=1">verbose results</a> from
        the <a href="detailed.html">Extended Interface</a>.
.EOF.
  }
}


if ($File->{Opt}->{Output} eq 'xml') {
  &report_xml($File);
} elsif ($File->{Opt}->{Output} eq 'earl') {
  &report_earl($File);
} elsif ($File->{Opt}->{Output} eq 'n3') {
  &report_n3($File);
} else {
  &prep_template($File, $T);

  if ($File->{'Is Valid'}) {
    $T->param(VALID => TRUE);
    &report_valid($File, $T);
  } else {
    $T->param(VALID => FALSE);
    $File->{Opt}->{'Show Source'} = TRUE;
    $T->param(file_errors => &report_errors($File));
  }

  $T->param(file_warnings  => $File->{Warnings});
  $T->param(file_outline   => &outline($File));
  $T->param(file_source    => &source($File));
  $T->param(file_parsetree => &parsetree($File));
  #    &show_esis($File)   if $File->{Opt}->{'Show ESIS'};
  #    &show_errors($File) if $File->{Opt}->{'Show Errors'};

  print $T->output;
}

#
# Get rid of $File object and exit.
undef $File;
exit;


#############################################################################
# Subroutine definitions
#############################################################################

#
# Generate HTML report.
sub prep_template ($$) {
  my $File = shift;
  my $T    = shift;

  #
  # XML mode...
  $T->param(is_xml => &is_xml($File));

  #
  # Upload?
  $T->param(is_upload => $File->{'Is Upload'});

  #
  # The URI...
  $T->param(file_uri => &ent($File->{URI}));

  #
  # Metadata...
  $T->param(file_modified    => $File->{Modified});
  $T->param(file_server      => $File->{Server});
  $T->param(file_size        => $File->{Size});
  $T->param(file_contenttype => $File->{ContentType});
  $T->param(file_charset     => $File->{Charset}->{Use});

  #
  # Output options...
  $T->param(opt_show_source    => $File->{Opt}->{'Show Source'});
  $T->param(opt_show_outline   => $File->{Opt}->{'Outline'});
  $T->param(opt_show_parsetree => $File->{Opt}->{'Show Parsetree'});
  $T->param(opt_show_noatt     => $File->{Opt}->{'No Attributes'});
  $T->param(opt_verbose        => $File->{Opt}->{'Verbose'});

  #
  # Tip of the Day...
  my $tip = &get_tip;
  $T->param(tip_uri  => $tip->[0]);
  $T->param(tip_slug => $tip->[1]);

  #
  # Namespaces...
  $T->param(file_namespace  => &ent($File->{Namespace}));
  $T->param(file_namespaces => $File->{Namespaces})
    if $File->{Namespaces};
}

#
# Output "This page is Valid" report.
sub report_valid {
  my $File = shift;
  my $T    = shift;

  $T->param(file_uri => &ent($File->{URI}));

  my $gifborder   = ' border="0"';
  my $xhtmlendtag = '';
  my($image_uri, $alttext, $gifhw);

  unless ($File->{Version} eq 'unknown' or defined $File->{Tentative}) {
    $T->param(file_version => $File->{Version});

    # @@@
    # print &daily_tip($File, $CFG->{Tips_DB});
    # &print_warnings($File);

    if (defined $image_uri) {
      $T->param(have_badge  => TRUE);
      $T->param(badge_uri   => $image_uri);
      $T->param(badge_alt   => $alttext);
      $T->param(badge_gifhw => $gifhw);
      $T->param(badge_xhtml => $xhtmlendtag);
    }
  } elsif (defined $File->{Tentative}) {
    $T->param(is_tentative => TRUE);
  }

  my $thispage = $File->{Env}->{'Self URI'};
  my $escaped_uri = uri_escape($File->{URI});
  $thispage .= qq(?uri=$escaped_uri);
  $thispage .= ';ss=1'      if $File->{Opt}->{'Show Source'};
  $thispage .= ';sp=1'      if $File->{Opt}->{'Show Parsetree'};
  $thispage .= ';noatt=1'   if $File->{Opt}->{'No Attributes'};
  $thispage .= ';outline=1' if $File->{Opt}->{'Outline'};
  $T->param(file_thispage => $thispage);
}


#
# Add a waring message to the output.
sub add_warning ($$$) {push @{shift->{Warnings}}, {title => shift, text => shift}};


#
# Print HTML explaining why/how to use a DOCTYPE Declaration.
sub doctype_spiel {
  return <<".EOF.";
    <p>
      You should place a DOCTYPE declaration as the very first thing in your
      HTML document. For example, for a typical <a
      href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a> document:
    </p>
    <pre>
      &lt;!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"&gt;
      &lt;html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"&gt;
        &lt;head&gt;
          &lt;title&gt;Title&lt;/title&gt;
        &lt;/head&gt;

        &lt;body&gt;
          &lt;!-- ... body of document ... --&gt;
        &lt;/body&gt;
      &lt;/html&gt;
    </pre>
    <p>
      For XML documents, you may also wish to include an "XML Declaration"
      even before the DOCTYPE Declaration, but this is not well supported
      in older browsers. More information about this can be found in the
      <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a> Recommendation.
    </p>
.EOF.
}


#
# Leave a message and then die (use for internal errors only)
sub internal_error {
  my $File = shift;
  my ($dieMessage) = shift;
  print <<"EOF";
    <hr />
    <strong class="error">Internal server error ($dieMessage).</strong>
    Please contact <a href="mailto:$CFG->{Maintainer}">maintainer</a>.
EOF
  print $File->{'Footer'};
  croak $dieMessage, "\n";
}


#
# Proxy authentication requests.
sub authenticate {
  my $File       = shift;
  my $resource   = shift;
  my $authHeader = shift;
  my $realm = $resource;
  $realm =~ s([^\w\d.-]*){}g;
  $authHeader =~ s( realm=([\'\"])?([^\1]+)\1){ realm="$realm-$2"};

    print <<"EOF";
Status: 401 Authorization Required
WWW-Authenticate: $authHeader
Connection: close
Content-Type: text/html; charset=utf-8

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
<html lang="en" xml:lang="en">
  <head><title>401 Authorization Required</title></head>
  <body>
    <h1>Authorization Required</h1>
    <p>Sorry, I am not authorized to access the specified URI.</p>
    <p>
      The URI you specified, &lt;<a href="$resource">$resource</a>&gt;,
      returned a 401 "authorization required" response when I tried
      to download it.
    </p>
    <p>
      You should have been prompted by your browser for a
      username/password pair; if you had supplied this information, I
      would have forwarded it to your server for authorization to
      access the resource. You can use your browser's "reload" function
      to try again, if you wish.
    </p>
    <p>
      Of course, you may not want to trust me with this information,
      which is fine. I can tell you that I don't log it or do
      anything else nasty with it, and you can <a
        href="http://validator.w3.org/source/">download the source for
      this service</a> to see what it does, but you have no guarantee
      that this is actually the code I'm using; you basically have to
      decide whether to trust me or not. :-)
    </p>
    <p>
      You should also be aware that the way we proxy this authentication
      information defeats the normal working of HTTP Authentication.
      If you authenticate to server A, your browser may keep sending
      the authentication information to us every time you validate
      a page, regardless of what server it's on, and we'll happily pass
      that on to the server thereby making it possible for a malicious
      server operator to capture your credentials.
    </p>
    <p>
      Due to the way HTTP Authentication works there is no way we can
      avoid this. We are using some "tricks" to fool your client into
      not sending this information in the first place, but there is no
      guarantee this will work. If security is a concern to you, you
      may wish to avoid validating protected resources or take extra
      precautions to prevent your browser from sending authentication
      information when validating other servers.
    </p>
    <p>
      Also note that you shouldn't use HTTP Basic Authentication for
      anything which really needs to be private, since the password
      goes across the network unencrypted.
    </p>
EOF
}


#
# Complain about unknown HTTP responses.
sub http_error {
  my $uri = &ent(shift);
  my $code = &ent(shift);
  my $message = &ent(shift);

  return <<"EOF";
  <p>
    I got the following unexpected response when trying to
    retrieve &lt;<a href="$uri">$uri</a>&gt;:
  </p>

  <blockquote>
    <p><code>$code $message</code></p>
  </blockquote>

  <p>
    Please make sure you have entered the URI correctly.
  </p>

EOF
}

#
# Fetch an URI and return the content and selected meta-info.
sub handle_uri {
  my $q    = shift; # The CGI object.
  my $File = shift; # The master datastructure.

  my $uri = new URI $q->param('uri'); # The URI to fetch.

  my $ua = new LWP::UserAgent;
  $ua->agent("W3C_Validator/$VERSION " . $ua->agent);
  $ua->parse_head(0);  # Parse the http-equiv stuff ourselves. @@ Why?

  # @@@FIXME@@@:
  #   Disable checking if the URI is local (or private) for security reasons,
  #   or at least make it configurable to do so.
  #   eg. /^(localhost(\.localdomain)?|127\..+)$/ (+ private networks)
  #   Net::IP from CPAN could be useful here.
  #
  $ua->protocols_allowed($CFG->{Allowed_Protocols});

  unless ($ua->is_protocol_supported($uri)) {
    $File->{'Error Flagged'} = TRUE;
    $File->{'Error Message'} = &uri_rejected($uri->scheme());
    return $File;
  }

  my $req = new HTTP::Request(GET => $uri);

  # If we got a Authorization header, the client is back at it after being
  # prompted for a password so we insert the header as is in the request.
  if($ENV{HTTP_AUTHORIZATION}){
    $req->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION});
  }

  my $res = $ua->request($req);

  unless ($res->code == 200 || $File->{Opt}->{'No200'}) {
    if ($res->code == 401) {
      &authenticate($File, $res->request->url, $res->www_authenticate);
    } else {
      $File->{'Error Flagged'} = TRUE;
      $File->{'Error Message'} = &http_error($uri->as_string, $res->code, $res->message);
    }
    return $File;
  }

  my($type, $ct, $charset) = &parse_content_type(
						 $File,
						 $res->header('Content-Type'),
						 scalar($res->request->url),
						);

  my $lastmod = undef;
  if ( $res->last_modified ) {
    $lastmod = scalar(gmtime($res->last_modified));
  }

  $File->{Bytes}           = $res->content;
  $File->{Type}            = $type;
  $File->{ContentType}     = $ct;
  $File->{Charset}->{HTTP} = lc $charset;
  $File->{Modified}        = $lastmod;
  $File->{Server}          = &ent(scalar $res->server);
  $File->{Size}            = scalar $res->content_length;
  $File->{URI}             = scalar $res->request->url;
  $File->{'Is Upload'}     = FALSE;

  return $File;

}

#
# Handle uploaded file and return the content and selected meta-info.
sub handle_file {
  my $q    = shift; # The CGI object.
  my $File = shift; # The master datastructure.

  my $f = $q->param('uploaded_file');
  my $h = $q->uploadInfo($f);
  my $file;

  local $/ = undef; # set line delimiter so that <> reads rest of file
  $file = <$f>;

  my($type, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'});

  $File->{Bytes}           = $file;
  $File->{Type}            = $type;
  $File->{ContentType}     = $ct;
  $File->{Charset}->{HTTP} = lc $charset;
  $File->{Modified}        = $h->{'Last-Modified'};
  $File->{Server}          = &ent($h->{'User-Agent'}); # Fake a "server". :-)
  $File->{Size}            = $h->{'Content-Length'};
  $File->{URI}             = "$f"; # Need to stringify because we want ref
                                   # to return false later in add_table.  This
                                   # is also a file handle... see man CGI.
  $File->{'Is Upload'}     = TRUE;

  return $File;
}

#
# Handle uploaded file and return the content and selected meta-info.
sub handle_frag {
  my $q    = shift; # The CGI object.
  my $File = shift; # The master datastructure.

  $File->{Bytes}       = $q->param('fragment');
  $File->{Type}        = 'html';
  $File->{Modified}    = '';
  $File->{Server}      = '';
  $File->{Size}        = '';
  $File->{URI}         = 'upload://Form Submission';
  $File->{'Is Upload'} = TRUE;

  return $File;
}


#
# Parse a Content-Type and parameters. Return document type and charset.
sub parse_content_type {
  my $File         = shift;
  my $Content_Type = shift;
  my $url          = shift || '';
  my $charset      = '';
  my $type         = '';

  my($ct, @param) = split /\s*;\s*/, lc $Content_Type;

  $type = $CFG->{File_Type}->{$ct} || $ct;

  foreach my $param (@param) {
    my($p, $v) = split /\s*=\s*/, $param;
    next unless $p =~ m(charset)i;
    if ($v =~ m/([\'\"]?)(\S+)\1/i) {
      $charset = lc $2;
      last;
    }
  }

  if ($type =~ m(/)) {
    if ($type =~ m(text/css) and defined $url) {
      print redirect
	'http://jigsaw.w3.org/css-validator/validator?uri='
	  . uri_escape $url;
      exit;
    } else {
      $File->{'Error Flagged'} = TRUE;
      $File->{'Error Message'} = sprintf(<<"      EOF", &ent($type));
    <p class="error">
      Sorry, I am unable to validate this document because its content type is
      <code>%s</code>, which is not currently supported by this service.
    </p>
      EOF
    }
  }

  return $type, $ct, $charset;
}


#
# Normalize newline forms (CRLF/CR/LF) to native newline.
sub normalize_newlines {
  my $file = shift;
  local $_ = shift;  #charset
  my $pattern = '';

  # don't use backreference parentheses!
  $pattern = '\x00\x0D(?:\x00\x0A)?|\x00\x0A' if /^utf-16be$/;
  $pattern = '\x0D\x00(?:\x0A\x00)?|\x0A\x00' if /^utf-16le$/;
  # $pattern = '\x00\x00\x00\x0D(?:\x00\x00\x00\x0A)?|\x00\x00\x00\x0A' if /^UCS-4be$/;
  # $pattern = '\x0D\x00\x00\x00(?:\x0A\x00\x00\x00)?|\x0A\x00\x00\x00' if /^UCS-4le$/;
  # insert other special cases here, such as EBCDIC
  $pattern = '\x0D(?:\x0A)?|\x0A' if !$pattern;    # all other cases

  return [split /$pattern/, $file];
}

#
# find exact charset from general one (utf-16)
#
# needed for per-line conversion and line splitting
# (BE is default, but this will apply only to HTML)
sub exact_charset {
  my $File = shift;
  my $general_charset = shift;
  my $exact_charset = $general_charset;

  if ($general_charset eq 'utf-16') {
    if ($File->{Charset}->{Auto} =~ m/^utf-16[bl]e$/) {
      $exact_charset = $File->{Charset}->{Auto};
    } else { $exact_charset = 'utf-16be'; }
  }
  # add same code for ucs-4 here
  return $exact_charset;
}


#
# Return $_[0] encoded for HTML entities (cribbed from merlyn).
#
# Note that this is used both for HTML and XML escaping.
#
sub ent {
  local $_ = shift;
  return '' unless defined; # Eliminate warnings
  s(["<&>"]){'&#' . ord($&) . ';'}ge;  # should switch to hex sooner or later
  return $_;
}


#
# Truncate source lines for report.
#
# This *really* wants Perl 5.8.0 and it's improved UNICODE support.
# Byte semantics are in effect on all length(), substr(), etc. calls,
# so offsets will be wrong if there are multi-byte sequences prior to
# the column where the error is detected.
#
sub truncate_line {
  my $line  = shift;
  my $col   = shift;

  my $start = $col;
  my $end   = $col;

  for (1..40) {
    $start-- if ($start - 1 >= 0);            # in/de-crement until...
    $end++   if ($end   + 1 <= length $line); # ...we hit end of line.
  }

  unless ($end - $start == 80) {
    if ($start == 0) { # Hit start of line, maybe grab more at end.
      my $diff = 40 - $col;
      for (1..$diff) {
        $end++ if ($end + 1 <= length $line);
      }
    } elsif ($end == length $line) { # Hit end of line, maybe grab more at beginning.
      my $diff = 80 - $col;
      for (1..$diff) {
        $start-- if ($start - 1 >= 0);
      }
    }
  }

  #
  # Add elipsis at end if necessary.
  unless ($end   == length $line) {substr $line, -3, 3, '...'};

  $col = $col - $start; # New offset is diff from $col to $start.
  $line = substr $line, $start, $end - $start; # Truncate.

  #
  # Add elipsis at start if necessary.
  unless ($start == 0)            {substr $line,  0, 3, '...'};

  return $line, $col;
}


#
# Suppress any existing DOCTYPE by commenting it out.
sub override_doctype {
  no strict 'vars';
  my $File    = shift;
  my $pubid = $CFG->{Types}->{$File->{Opt}->{DOCTYPE}}->{PubID};
  my $sysid = $CFG->{Types}->{$File->{Opt}->{DOCTYPE}}->{SysID};
  my $name  = $CFG->{Types}->{$File->{Opt}->{DOCTYPE}}->{Name};
  local $dtd  = qq(<!DOCTYPE $name PUBLIC "$pubid"\n  "$sysid">);
  local $HTML = '';
  local $seen = FALSE;

  my $declaration = sub {
    $seen = TRUE;
    $HTML .= "$dtd\n" . '<!-- ' . $_[0] . ' -->';
  };

  HTML::Parser->new(default_h     => [sub {$HTML .= shift}, 'text'],
                    declaration_h => [$declaration, 'text']
                   )->parse(join "\n", @{$File->{Content}})->eof();

  $File->{Content} = [split /\n/, $HTML];
  unshift @{$File->{Content}}, $dtd unless $seen;
  return $File;
}


#
# Parse errors reported by SP.
sub parse_errors ($$) {
  my $File = shift;
  my $fh   = shift;

  $File->{Errors} = []; # Initialize to an (empty) anonymous array ref.
  for (<$fh>) {
    push @{$File->{'DEBUG'}->{Errors}}, $_;
    my($err, @errors);
    next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
    next if /numbers exceeding 65535 not supported/;
    next if /URL Redirected to/;

    my(@_err) = split /:/;
    next unless $_err[1] eq '<OSFD>0';
    if ($_err[1] =~ m(^<URL>)) {
      @errors = ($_err[0], join(':', $_err[1], $_err[2]), @_err[3..$#_err]);
    } else {
      @errors = @_err;
    }
    $err->{src}  = $errors[1];
    $err->{line} = $errors[2];
    $err->{char} = $errors[3];
    $err->{type} = $errors[4];
    if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') {
      $err->{msg}  = $errors[5];
    } elsif ($err->{type} eq 'W') {
      &add_warning(
		   $File, 'Warning:',
		   "Line $err->{line}, column $err->{char}: $errors[5]"
		  );
      $err->{msg}  = $errors[5];
    } else {
      $err->{type} = 'I';
      $err->{msg}  = $errors[4];
    }

    # Strip curlies from lq-nsgmls output.
    $err->{msg} =~ s/[{}]//g;

    # An unknown FPI and no SI.
    if ($err->{msg} =~ m(cannot generate system identifier for entity)
     or $err->{msg} =~ m(unrecognized DOCTYPE)i
     or $err->{msg} =~ m(no document type declaration)i) {
      $File->{'Error Flagged'} = TRUE;
      $File->{'Error Message'} = <<".EOF.";
     <div class="fatal">
       <h2>Fatal Error: $err->{msg}</h2>
       <p>
         I could not parse this document, because it uses a public identifier
         that is not in my catalog.
       </p>
.EOF.
      $File->{'Error Message'} .= &doctype_spiel();
      $File->{'Error Message'} .= "    </div>\n";
    }

    # No or unknown FPI and a relative SI.
    if ($err->{msg} =~ m(cannot (open|find))) {
      $File->{'Error Flagged'} = TRUE;
      $File->{'Error Message'} = <<".EOF.";
    <div class="fatal">
      <h2>Fatal Error: $err->{msg}</h2>
      <p>
        I could not parse this document, because it makes reference to a
        system-specific file instead of using a well-known public identifier
        to specify the type of markup being used.
      </p>
.EOF.
      $File->{'Error Message'} .= &doctype_spiel();
      $File->{'Error Message'} .= "    </div>\n";
    }

    # No DOCTYPE.
    if ($err->{msg} =~ m(prolog can\'t be omitted)) {
      $File->{'Error Flagged'} = TRUE;
      $File->{'Error Message'} = <<".EOF.";
    <div class="fatal">
      <h2>Fatal Error: No DOCTYPE specified!</h2>
      <p>
        I could not parse this document, because it does not include a
        DOCTYPE Declaration. A DOCTYPE Declaration is mandatory for most
        current markup languages and without such a declaration it is
        impossible to validate this document.
      </p>
.EOF.
      $File->{'Error Message'} .= &doctype_spiel();
      $File->{'Error Message'} .= <<".EOF.";
      <p>
        The W3C QA Activity maintains a <a
          href="http://www.w3.org/QA/2002/04/valid-dtd-list.html">List of
            Valid Doctypes</a> that you can choose from, and the <acronym
          title="Web Design Group">WDG</acronym> maintains a document on
        "<a href="http://htmlhelp.com/tools/validator/doctype.html">Choosing
           a DOCTYPE</a>".
      </p>
.EOF.
      $File->{'Error Message'} .= "    </div>\n";
    }

    &abort_if_error_flagged($File, O_DOCTYPE);
    push @{$File->{Errors}}, $err;
  }
  undef $fh;
  return $File;
}

#
# Generate a HTML report of detected errors.
sub report_errors ($) {
  my $File = shift;

  my $Errors = [];

  if (scalar @{$File->{Errors}}) {
    foreach my $err (@{$File->{Errors}}) {
      my($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});

      # Strip curlies from lq-nsgmls output.
      $err->{msg} =~ s/[{}]//g;

      # Find index into the %frag hash for the "explanation..." links.
      $err->{idx} =  $err->{msg};
      $err->{idx} =~ s/"[^\"]*"/FOO/g;
      $err->{idx} =~ s/[^A-Za-z ]//g;
      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
      $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces. )
      $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.

      $line = &ent($line); # Entity encode.
      $line =~ s/\t/ /g;   # Collapse TABs.

      if (defined $CFG->{Error_to_URI}->{$err->{idx}}) {
	$err->{uri} = $CFG->{Msg_FAQ_URI} . '#'
	  . $CFG->{Error_to_URI}->{$err->{idx}};
      }

      $err->{src} = $line;
      $err->{col} = ' ' x $col;

      push @{$Errors}, $err;
    }
  }
  return $Errors;
}


#
# Produce an outline of the document based on Hn elements from the ESIS.
sub outline {
  my $File = shift;

  my $outline = '';

  my $prevlevel = 0;
  my $indent    = 0;
  my $level     = 0;

  for (1 .. $#{$File->{ESIS}}) {
    my $line = $File->{ESIS}->[$_];
    next unless $line =~ /^\(H([1-6])$/i;
    $prevlevel = $level;
    $level     = $1;

    $outline .= "    </ul>\n" x ($prevlevel - $level); # perl is so cool.
    if ($level - $prevlevel == 1) {$outline .= "    <ul>\n"};
    foreach my $i (($prevlevel + 1) .. ($level - 1)) {
      $outline .= qq(  <ul>\n    <li class="warning">A level $i heading is missing!</li>\n);
    }
    if ($level - $prevlevel > 1) {$outline .= "    <ul>\n"};

    $line       = '';
    my $heading = '';
    until (substr($line, 0, 3) =~ /^\)H$level/i) {
      $line = $File->{ESIS}->[$_++];
      $line =~ s/\\011/ /g;
      $line =~ s/\\012/ /g;
      if ($line =~ /^-/) {
        my $headcont = $line;
        substr($headcont, 0, 1) = " ";
        $headcont =~ s/\\n/ /g;
        $heading .= $headcont;
      } elsif ($line =~ /^AALT CDATA( .+)/i) {
        my $headcont = $1;
        $headcont =~ s/\\n/ /g;
        $heading .= $headcont;
      }
    }

    $heading = substr($heading, 1); # chop the leading '-' or ' '.
    $heading = &ent($heading);
    $outline .= "    <li>$heading</li>\n";
  }
  $outline .= "    </ul>\n" x $level;

  return $outline;
}


#
# Create a HTML representation of the document.
sub source {
  my $File = shift;
  my $line = 1;
  my @source = ();

  for (@{$File->{Content}}) {
    push @source, {
		   file_source_i => $line,
		   file_source_line => ent $_,
		  };
    $line++;
  }
  return \@source;
}


#
# Create a HTML Parse Tree of the document for validation report.
sub parsetree {
  my $File = shift;
  my $tree = '';

  $T->param(file_parsetree_noatt => TRUE) if $File->{Opt}->{'No Attributes'};

  my $indent   = 0;
  my $prevdata = '';

  foreach my $line (@{$File->{ESIS}}) {
    if ($File->{Opt}->{'No Attributes'}) { # don't show attributes
      next if $line =~ /^A/;
      next if $line =~ /^\(A$/;
      next if $line =~ /^\)A$/;
    }

    $line =~ s/\\n/ /g;
    $line =~ s/\\011/ /g;
    $line =~ s/\\012/ /g;
    $line =~ s/\s+/ /g;
    next if $line =~ /^-\s*$/;

    if ($line =~ /^-/) {
      substr($line, 0, 1) = ' ';
      $prevdata .= $line;
      next;
    } elsif ($prevdata) {
      $prevdata = &ent($prevdata);
      $prevdata =~ s/\s+/ /go;
      $tree .= wrap(' ' x $indent, ' ' x $indent, $prevdata) . "\n";
      undef $prevdata;
    }

    $line = &ent($line);
    if ($line =~ /^\)/) {
      $indent -= 2;
    }

    my $printme;
    chomp($printme = $line);
    $printme =~ s{^([()])(.*)} # reformat and add links on HTML elements
                 { my $close = '';
                      $close = "/" if $1 eq ")"; # ")" -> close-tag
                   "&lt;" . $close . "<a href=\"" .
                   $CFG->{Element_Ref_URI} . $CFG->{Element_Map}->{lc($2)} .
                   "\">$2<\/a>&gt;"
                 }egx;
    $printme =~ s,^A,  A,; # indent attributes a bit
    $tree .= ' ' x $indent . $printme . "\n";
    if ($line =~ /^\(/) {
      $indent += 2;
    }
  }
  return $tree;
}


#
# Do an initial parse of the Document Entity to extract charset and FPI.
sub preparse {
  my $File = shift;

  #
  # Reset DOCTYPE, Root, and Charset (for second invocation).
  $File->{Charset}->{META} = '';
  $File->{DOCTYPE}         = '';
  $File->{Root}            = '';

  my $dtd = sub {
    return if $File->{Root};
    ($File->{Root}, $File->{DOCTYPE}) = shift =~  m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
  };

  my $start = sub {
    my $tag  = shift;
    my $attr = shift;
    my %attr = map {lc($_) => $attr->{$_}} keys %{$attr};

    if ($File->{Root}) {
      if (lc $tag eq 'meta') {
        if (lc $attr{'http-equiv'} eq 'content-type') {
          if ($attr{content} =~ m(charset\s*=[\s\"\']*([^\s;\"\'>]*))si) {
            $File->{Charset}->{META} = lc $1;
          }
        }
      }
      return unless $tag eq $File->{Root};
    } else {
      $File->{Root} = $tag;
    }
    if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}};
  };

  my $p = HTML::Parser->new(api_version => 3);
  $p->xml_mode(TRUE);
  $p->ignore_elements('BODY');
  $p->ignore_elements('body');
  $p->handler(declaration => $dtd, 'text');
  $p->handler(start => $start, 'tag,attr');
  $p->parse(join "\n", @{$File->{Content}});

  $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
  $File->{DOCTYPE} =~ s(^\s+){ }g;
  $File->{DOCTYPE} =~ s(\s+$){ }g;
  $File->{DOCTYPE} =~ s(\s+) { }g;

  return $File;
}

#
# Print out the raw ESIS output for debugging.
sub show_esis ($) {
  print <<'EOF';
  <div id="raw_esis" class="mtb">
    <hr />
    <h2><a name="raw_esis">Raw ESIS Output</a></h2>
    <pre>
EOF
  for (@{shift->{'DEBUG'}->{ESIS}}) {
    s/\\012//g;
    s/\\n/\n/g;
    print ent $_;
  }
  print "    </pre>\n  </div>";
}

#
# Print out the raw error output for debugging.
sub show_errors ($) {
  print <<'EOF';
  <div id="raw_errors" class="mtb">
    <hr />
    <h2><a name="raw_errors">Raw Error Output</a></h2>
    <pre>
EOF
  for (@{shift->{'DEBUG'}->{Errors}}) {print ent $_};
  print "    </pre>\n  </div>";
}


#
# Preprocess CGI parameters.
sub prepCGI {
  my $File = shift;
  my    $q = shift;

  # Avoid CGI.pm's "exists but undef" behaviour.
  if (scalar $q->param) {
    foreach my $param ($q->param) {
      next if $param eq 'uploaded_file'; # 'uploaded_file' contains data.
      next if $q->param($param) eq '0';  # Keep false-but-set params.
      #
      # Parameters that are given to us without specifying a value get
      # set to "1" (the "TRUE" constant). This is so we can test for the
      # boolean value of a parameter instead of first checking whether
      # the param was given and then testing it's value. Needed because
      # CGI.pm sets ";param" and ";param=" to a boolean false value
      # (undef() or a null string, respectively).
      $q->param($param, TRUE) unless $q->param($param);
    }
  }

  # Futz the URI so "/referer" works.
  if ($q->path_info) {
    if ($q->path_info eq '/referer' or $q->path_info eq '/referrer') {
      $q->param('uri', 'referer');
    } else {
      my $thispage = &self_url($q);
      print redirect $thispage;
      exit;
    }
  }
  # Use "url" unless a "uri" was also given.
  if ($q->param('url') and not $q->param('uri')) {
    $q->param('uri', $q->param('url'));
  }

  # Munge the URI to include commonly omitted prefix.
  my $u = $q->param('uri');
  $q->param('uri', "http://$u") if $u && $u =~ m(^www)i;

  # Issue a redirect for uri=referer.
  if ($q->param('uri') and $q->param('uri') eq 'referer') {
    print redirect $q->url() . '?uri=' . uri_escape($q->referer);
    exit;
  }

  # Supersede URI with an uploaded file.
  if ($q->param('uploaded_file')) {
    $q->param('uri', 'upload://' . $q->param('uploaded_file'));
    $File->{'Is Upload'} = TRUE; # Tag it for later use.
  }

  # Supersede URI with an uploaded fragment.
  if ($q->param('fragment')) {
    $q->param('uri', 'upload://Form Submission');
    $File->{'Is Upload'} = TRUE; # Tag it for later use.
  }

  # Redirect to a GETable URL if method is POST without a file upload.
  if ($q->request_method eq 'POST' and not $File->{'Is Upload'}) {
    my $thispage = &self_url($q);
    print redirect $thispage;
    exit;
  }

  #
  # Flag an error if we didn't get a file to validate.
  unless ($q->param('uri')) {
    $File->{'Error Flagged'} = TRUE;
    $File->{'Error Message'} = &uri_rejected();
  }

  return $q;
}

#
# Preprocess SSI files.
sub prepSSI {
  my $opt = shift;

  my $fh = new IO::File "< $opt->{File}"
    or croak "open($opt->{File}) returned: $!\n";
  my $ssi = join '', <$fh>;
  close $fh or carp "close($opt->{File}) returned: $!\n";

  $ssi =~ s/<!--\#echo var="title" -->/$opt->{Title}/g
    if defined $opt->{Title};

  $ssi =~ s/<!--\#echo var="date" -->/$opt->{Date}/g
    if defined $opt->{Date};

  $ssi =~ s/<!--\#echo\s+var="revision"\s+-->/$opt->{Revision}/g
    if defined $opt->{Revision};

  # No need to parametrize this one, it's always "./" in this context.
  $ssi =~ s|<!--\#echo\s+var="relroot"\s+-->|./|g;

  return $ssi;
}


#
# Output errors for a rejected URI.
sub uri_rejected {
  my $scheme = shift || 'undefined';

  return sprintf(<<".EOF.", &ent($scheme));
    <div class="error">
      <p>
        Sorry, this type of
        <a href="http://www.w3.org/Addressing/#terms">URI</a>
        <a href="http://www.iana.org/assignments/uri-schemes">scheme</a>
        (<q>%s</q>) is not supported by this service. Please check
        that you entered the URI correctly.
      </p>
      <p>URIs should be in the form: <code>http://validator.w3.org/</code></p>
      <p>
        If you entered a valid URI using a scheme that we should support,
        please let us know as outlined on our
        <a href="feedback.html">Feedback page</a>. Make sure to include the
        specific URI you would like us to support, and if possible provide a
        reference to the relevant standards document describing the URI scheme
        in question.
      </p>
    </div>
.EOF.
}


#
# Utility subs to tell if type "is" something.
sub is_xml    {shift->{Type} =~ m(^[^+]+\+xml$)};
sub is_svg    {shift->{Type} =~ m(svg\+xml$)};
sub is_smil   {shift->{Type} =~ m(smil\+xml$)};
sub is_html   {shift->{Type} =~ m(html\+sgml$)};
sub is_xhtml  {shift->{Type} =~ m(xhtml\+xml$)};
sub is_mathml {shift->{Type} =~ m(mathml\+xml$)};


#
# Check charset conflicts and add any warnings necessary.
sub charset_conflicts {
  my $File = shift;
  #
  # Handle the case where there was no charset to be found.
  unless ($File->{Charset}->{Use}) {
    &add_warning($File, 'No Character Encoding detected!', <<".EOF.");
      To ensure correct validation, processing, and display, it is important
      that the character encoding is properly labeled.
      <a href="http://www.w3.org/International/O-charset.html">More
      information...</a>
.EOF.
    $File->{Tentative} |= T_WARN;
  }

  my $cs_use  = $File->{Charset}->{Use}  ? &ent($File->{Charset}->{Use})  : '';
  my $cs_opt  = $File->{Opt}->{Charset}  ? &ent($File->{Opt}->{Charset})  : '';
  my $cs_http = $File->{Charset}->{HTTP} ? &ent($File->{Charset}->{HTTP}) : '';
  my $cs_xml  = $File->{Charset}->{XML}  ? &ent($File->{Charset}->{XML})  : '';
  my $cs_meta = $File->{Charset}->{META} ? &ent($File->{Charset}->{META}) : '';

  #
  # warn about charset override
  if ($File->{Charset}->{Override} &&
      $File->{Charset}->{Override} ne $File->{Charset}->{Use}) {
    &add_warning($File, 'Character Encoding Override in effect!', <<".EOF.");
      The detected character encoding, &#171;<code>$cs_use</code>&#187;, has
      been suppressed and the character encoding
      &#171;<code>$cs_opt</code>&#187; used instead.
.EOF.
    $File->{Tentative} |= T_ERROR;
  }

  #
  # Add a warning if there was charset info conflict (HTTP header,
  # XML declaration, or <meta> element).
  if (&conflict($File->{Charset}->{HTTP}, $File->{Charset}->{XML})) {
    &add_warning($File, 'Character Encoding mismatch!', <<".EOF.");
      The character encoding from the HTTP header (<code>$cs_http</code>)
      is different from the value in the XML declaration
      (<code>$cs_xml</code>). I will use the value from the HTTP header
      (<code>$cs_use</code>) for this validation.
.EOF.
  } elsif (&conflict($File->{Charset}->{HTTP}, $File->{Charset}->{META})) {
    &add_warning($File, 'Character Encoding mismatch!', <<".EOF.");
      The character encoding from the HTTP header (<code>$cs_http</code>)
      is different from the value in the <code>&lt;meta&gt;</code> element
      (<code>$cs_meta</code>). I will use the value from the HTTP header
      (<code>$cs_use</code>) for this validation.
.EOF.
  }
  elsif (&conflict($File->{Charset}->{XML}, $File->{Charset}->{META})) {
    &add_warning($File, 'Character Encoding mismatch!', <<".EOF.");
      The character encoding from the XML declaration (<code>$cs_xml</code>)
      is different from the value in the <code>&lt;meta&gt;</code> element
      (<code>$cs_meta</code>). I will use the value from the XML declaration
      (<code>$cs_xml</code>) for this validation.
.EOF.
    $File->{Tentative} |= T_WARN;
  }

  return $File;
}


#
# Transcode to UTF-8
sub transcode {
  my $File = shift;

  my ($command, $result_charset) = split " ", $CFG->{Charsets}->{$File->{Charset}->{Use}}, 2;

  $result_charset = exact_charset($File, $result_charset);
  if ($command eq 'I') {
    # test if given charset is available
    eval {my $c = Text::Iconv->new($result_charset, 'utf-8')};
    $command = '' if $@;
  } elsif ($command eq 'X') {
    $@ = "$File->{Charset}->{Use} undefined; replace by $result_charset";
  }

  if ($command ne 'I') {
    my $cs = &ent($File->{Charset}->{Use});
    $File->{'Error Flagged'} = TRUE;
    $File->{'Error Message'} = sprintf(<<".EOF.", $cs, &ent($@));
      <p>Sorry!
        A fatal error occurred when attempting to transcode the character
        encoding of the document. Either we do not support this character
        encoding yet, or you have specified a non-existent character encoding
        (often a misspelling).
      </p>
      <p>The detected character encoding was "%s".</p>
      <p>The error was "%s".</p>
      <p>
        If you believe the character encoding to be valid you can submit a
        request for that character encoding (see the
        <a href="feedback.html">feedback page</a> for details) and we will
        look into supporting it in the future.
      </p>
.EOF.
    $File->{'Error Message'} .= <<'.EOF.';
    <p>
      <a href="http://www.iana.org/"><acronym
      title="Internet Assigned Numbers Authority">IANA</acronym></a>
      maintains the list of <a
      href="http://www.iana.org/assignments/character-sets">official
      names for character sets</a>.
    </p>
.EOF.
    return $File;
  }

  my $c = Text::Iconv->new($result_charset, 'utf-8');
  my $line = 0;
  for (@{$File->{Content}}) {
    my $in = $_;
    $line++;
    $_ = $c->convert($_); # $_ is local!!
    if ($in ne "" and $_ eq "") {
      push @{$File->{Lines}}, $line;
      $_ = "#### encoding problem on this line, not shown ####";
    }
  }
  return $File;
}

#
# Check correctness of UTF-8 both for UTF-8 input and for conversion results
sub check_utf8 {
  my $File = shift;

  for (my $i = 0; $i < $#{$File->{Content}}; $i++) {
    # substitution needed for very long lines (>32K), to avoid backtrack
    # stack overflow. Handily, this also happens to count characters.
    local $_ = $File->{Content}->[$i];
    my $count =
    s/  [\x00-\x7F]                           # ASCII
      | [\xC2-\xDF]        [\x80-\xBF]        # non-overlong 2-byte sequences
      |  \xE0[\xA0-\xBF]   [\x80-\xBF]        # excluding overlongs
      | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}     # straight 3-byte sequences
      |  \xED[\x80-\x9F]   [\x80-\xBF]        # excluding surrogates
      |  \xF0[\x90-\xBF]   [\x80-\xBF]{2}     # planes 1-3
      | [\xF1-\xF3]        [\x80-\xBF]{3}     # planes 4-15
      |  \xF4[\x80-\x8F][\x80-\xBF]{2}        # plane 16
     //xg;
    if (length) {
      push @{$File->{Lines}}, ($i+1);
      $File->{Content}->[$i] = "#### encoding problem on this line, not shown ####";
      $count = 50; # length of above text
    }
    $count += 0; # Force numeric.
    $File->{Offsets}->[$i + 1] = [$count, $File->{Offsets}->[$i]->[1] + $count];
  }
  return $File;
}

#
# byte error analysis
sub byte_error {
  my $File = shift;
  my @lines = @{$File->{Lines}};
  if (scalar @lines) {
    $File->{'Error Flagged'} = TRUE;
    my $s = $#lines ? 's' : '';
    my $lines = join ', ', split ',', Set::IntSpan->new(\@lines)->run_list;
    my $cs = &ent($File->{Charset}->{Use});
    $File->{'Error Message'} = <<".EOF.";
      <p class="error">
        Sorry, I am unable to validate this document because on line$s
        <strong>$lines</strong>
        it contained one or more bytes that I cannot interpret as
        <code>$cs</code> (in other words, the bytes
        found are not valid values in the specified Character Encoding).
        Please check both the content of the file and the character
        encoding indication.
      </p>
.EOF.
  }
  return $File;
}


#
# Return an XML report for the page.
sub report_xml {
  my $File = shift;

  my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
  my $errs  = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});

  print <<".EOF.";
Content-Type: application/xml; charset=UTF-8
X-W3C-Validator-Status: $valid
X-W3C-Validator-Errors: $errs

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="xml-results.xsl"?>
<!DOCTYPE result [
  <!ELEMENT result (meta, warnings?, messages?)>
  <!ATTLIST result
    version CDATA #FIXED '0.9'
  >

  <!ELEMENT meta (uri, modified, server, size, encoding, doctype)>
  <!ELEMENT uri      (#PCDATA)>
  <!ELEMENT modified (#PCDATA)>
  <!ELEMENT server   (#PCDATA)>
  <!ELEMENT size     (#PCDATA)>
  <!ELEMENT encoding (#PCDATA)>
  <!ELEMENT doctype  (#PCDATA)>

  <!ELEMENT warnings (warning)+>
  <!ELEMENT warning  (#PCDATA)>

  <!ELEMENT messages (msg)*>
  <!ELEMENT msg      (#PCDATA)>
  <!ATTLIST msg
    line   CDATA #IMPLIED
    col    CDATA #IMPLIED
    offset CDATA #IMPLIED
  >
]>
.EOF.

  print qq(
<result>
  <meta>
    <uri>), &ent($File->{URI}), qq(</uri>
    <modified>), &ent($File->{Modified}), qq(</modified>
    <server>), $File->{Server}, qq(</server>
    <size>), &ent($File->{Size}), qq(</size>
    <encoding>), &ent($File->{Charset}->{Use}), qq(</encoding>
    <doctype>), &ent($File->{DOCTYPE}), qq(</doctype>
  </meta>
);

  &add_warning($File, 'Warning:', <<".EOF.");
      This interface is highly experimental and the output *will* change
      -- probably even several times -- before finished. Do *not* rely on it!
      See http://validator.w3.org/docs/users.html#api-warning
.EOF.

  if (defined $File->{Warnings} and scalar @{$File->{Warnings}}) {
    print qq(  <warnings>\n);
    printf qq(    <warning>%s</warning>\n), &ent($_) for @{$File->{Warnings}};
    print qq(  </warnings>\n);
  }

  if (defined $File->{Errors} and scalar @{$File->{Errors}}) {
    print qq(  <messages>\n);

    foreach my $err (@{$File->{Errors}}) {
      # Strip curlies from lq-nsgmls output.
      $err->{msg} =~ s/[{}]//g;
      chomp $err->{msg};

      # Find index into the %frag hash for the "explanation..." links.
      $err->{idx} =  $err->{msg};
      $err->{idx} =~ s/"[^\"]*"/FOO/g;
      $err->{idx} =~ s/[^A-Za-z ]//g;
      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
      $err->{idx} =~ s/(^\s|\s$)//g; # Remove leading and trailing spaces.
      $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.

      my $offset = $File->{Offsets}->[$err->{line} - 1]->[1] + $err->{char};
      printf <<".EOF.", &ent($err->{msg});
    <msg line="$err->{line}" col="$err->{char}" offset="$offset">%s</msg>
.EOF.
    }
    print qq(  </messages>\n);
  }
  print qq(</result>\n);
}


#
# Return an EARL report for the page.
sub report_earl {
  my $File = shift;

  my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
  my $errs  = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});

  print <<".EOF.";
Content-Type: application/rdf+xml; charset=UTF-8
X-W3C-Validator-Status: $valid
X-W3C-Validator-Errors: $errs

<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://www.w3.org/2001/03/earl/1.0-test#"
  xmlns:val="http://validator.w3.org/this_will_change/do_not_rely_on_it!">

  <Assertor rdf:about="http://validator.w3.org/">
    <name>W3 Validator</name>
    <contactInfo rdf:resource="http://validator.w3.org/about.html"/>
    <testMode rdf:resource="http://www.w3.org/2001/03/earl/1.00#Auto" />

.EOF.

  unless ($File->{'Is Valid'}) {
    printf <<".EOF.", &ent($File->{URI});
  <asserts>
   <Assertion rdf:ID="result">
    <subject rdf:resource="%s" />
    <result rdf:resource="http://www.w3.org/2001/03/earl/1.00#fails" />
    <testCase rdf:resource="http://www.w3.org/MarkUp/" />
    <note>Invalid!</note>
   </Assertion>
  </asserts>
.EOF.

    my $errnum = 0 ;
    foreach my $err (@{$File->{Errors}}) {
      # Strip curlies from lq-nsgmls output.
      ++$errnum ;
      $err->{msg} =~ s/[{}]//g;
      chomp $err->{msg};

      # Find index into the %frag hash for the "explanation..." links.
      $err->{idx} =  $err->{msg};
      $err->{idx} =~ s/"[^\"]*"/FOO/g;
      $err->{idx} =~ s/[^A-Za-z ]//g;
      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
      $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
      $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.

      my @offsets = (
                     $File->{Offsets}->[$err->{line}    ]->[0],
                     $File->{Offsets}->[$err->{line} - 1]->[1],
                     $File->{Offsets}->[$err->{line} - 1]->[1] + $err->{char}
                    );
      printf <<".EOF.", &ent($File->{URI}), &ent($err->{msg});
    <asserts>
      <Assertion rdf:ID="err$errnum">
        <subject rdf:parseType="Resource">
          <reprOf rdf:resource="%s"/>
          <val:line>$err->{line}</val:line>
          <val:column>$err->{char}</val:column>
          <val:offset>@offsets</val:offset>
        </subject>
        <result rdf:resource="http://www.w3.org/2003/03/earl/1.00#fails" />
        <testCase rdf:resource="http://www.w3.org/Markup/" />
        <note>%s</note>
      </Assertion>
    </asserts>
.EOF.
    }
  } else {
    printf <<".EOF.", &ent($File->{URI});
  <asserts>
   <Assertion>
    <subject rdf:resource="%s" />
    <result rdf:resource="http://www.w3.org/2001/03/earl/1.00#passes" />
    <testCase rdf:resource="http://www.w3.org/MarkUp/" />
    <note>Valid!</note>
   </Assertion>
  </asserts>
.EOF.
  }

  print <<".EOF.";
  </Assertor>
</rdf:RDF>
.EOF.
}


#
# Return a Notation3 EARL report for the page.
#
# @@ TODO: escape output
sub report_n3 {
  my $File = shift;

  my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
  my $errs  = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});

  print <<".EOF.";
Content-Type: text/plain; charset=UTF-8
X-W3C-Validator-Status: $valid
X-W3C-Validator-Errors: $errs

\@prefix earl: <http://www.w3.org/2001/03/earl/1.0-test#> .
\@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
\@prefix val: <http://validator.w3.org/this_will_change/do_not_rely_on_it!> .

<http://validator.w3.org/> a earl:Assertor;
  earl:name "W3 Validator";
  earl:asserts
.EOF.

  unless ($File->{'Is Valid'}) {
    for (my $i = 0; $i <= scalar @{$File->{Errors}}; $i++) {
      my $err = $File->{Errors}->[$i];
      # Strip curlies from lq-nsgmls output.
      $err->{msg} =~ s/[{}]//g;
      chomp $err->{msg};

      # Find index into the %frag hash for the "explanation..." links.
      $err->{idx} =  $err->{msg};
      $err->{idx} =~ s/"[^\"]*"/FOO/g;
      $err->{idx} =~ s/[^A-Za-z ]//g;
      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
      $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
      $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.

      my @offsets = (
                     $File->{Offsets}->[$err->{line}    ]->[0],
                     $File->{Offsets}->[$err->{line} - 1]->[1],
                     $File->{Offsets}->[$err->{line} - 1]->[1] + $err->{char}
                    );
      print <<".EOF.";
    [
      earl:testMode earl:Auto;
      rdf:predicate earl:fails;
      rdf:subject [
                    val:column "$err->{char}";
                    val:line   "$err->{line}";
                    val:offset "@offsets";
                    earl:testSubject <$File->{URI}>
                  ];
      rdf:object [
                   earl:id <http://www.w3.org/HTML/>;
                   earl:note """$err->{msg} """
                 ]
.EOF.

     if ($i == scalar @{$File->{Errors}}) {
       print "    ]\n";
     } else {
       print "    ],\n";
     }
    }
  } else {
    print <<".EOF.";
    [
      earl:testMode earl:Auto;
      rdf:predicate earl:passes;
      rdf:subject   [earl:testSubject <$File->{URI}>];
      rdf:object    [
                      earl:id <http://www.w3.org/HTML/>;
                      earl:note "Valid"
                    ]
    ]
.EOF.
  }
  print " .\n";
}


#
# Autodetection as in Appendix F of the XML 1.0 Recommendation.
# <http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing>
#
# return values are: (base_encoding, BOMSize, Size, Pattern)
sub find_base_encoding {
  local $_ = shift;

  # With a Byte Order Mark:
  return ('ucs-4be',  4, 4, '\0\0\0(.)')
    if /^\x00\x00\xFE\xFF/; # UCS-4, big-endian machine (1234)
  return ('ucs-4le',  4, 4, '(.)\0\0\0')
    if /^\xFF\xFE\x00\x00/; # UCS-4, little-endian machine (4321)
  return ('utf-16be', 2, 2, '\0(.)')
    if /^\xFE\xFF/;         # UTF-16, big-endian.
  return ('utf-16le', 2, 2, '(.)\0')
    if /^\xFF\xFE/;         # UTF-16, little-endian.
  return ('utf-8',    3, 1, '')
    if /^\xEF\xBB\xBF/; # UTF-8.

  # Without a Byte Order Mark:
  return ('ucs-4be',  0, 4, '\0\0\0(.)')
    if /^\x00\x00\x00\x3C/; # UCS-4 or 32bit; big-endian machine (1234 order).
  return ('ucs-4le',  0, 4, '(.)\0\0\0')
    if /^\x3C\x00\x00\x00/; # UCS-4 or 32bit; little-endian machine (4321 order).
  return ('utf-16be', 0, 2, '\0(.)')
    if /^\x00\x3C\x00\x3F/; # UCS-2, UTF-16, or 16bit; big-endian.
  return ('utf-16le', 0, 2, '(.)\0')
    if /^\x3C\x00\x3F\x00/; # UCS-2, UTF-16, or 16bit; little-endian.
  return ('utf-8',    0, 1, '')
    if /^\x3C\x3F\x78\x6D/; # UTF-8, ISO-646, ASCII, ISO-8859-*, Shift-JIS, EUC, etc.
  return ('ebcdic',   0, 1, '')
    if /^\x4C\x6F\xA7\x94/; # EBCDIC
  return ('',         0, 1, '');
                            # nothing in particular
}


#
# Find encoding in document according to XML rules
# Only meaningful if file contains a BOM, or for well-formed XML!
sub find_xml_encoding {
  my $File = shift;
  my ($CodeUnitSize, $Pattern);

  ($File->{Charset}->{Auto}, $File->{BOM}, $CodeUnitSize, $Pattern)
    = &find_base_encoding($File->{Bytes});
  warn qq("$File->{Charset}->{Auto}" "$File->{BOM}" "$CodeUnitSize" "$Pattern");
  my $someBytes = substr $File->{Bytes}, $File->{BOM}, ($CodeUnitSize * 100);
  my $someText = '';                  # 100 arbitrary, but enough in any case

  # translate from guessed encoding to ascii-compatible
  if ($File->{Charset}->{Auto} eq 'ebcdic') {
    # special treatment for EBCDIC, maybe use tr///
    # work on this later
  }
  elsif (!$Pattern) {
    $someText = $someBytes; # efficiency shortcut
  }
  else { # generic code for UTF-16/UCS-4
    $someBytes =~ /^(($Pattern)*)/s;
    $someText = $1;       # get initial piece without chars >255
    $someText =~ s/$Pattern/$1/sg;    # select the relevant bytes
  }

  # try to find encoding pseudo-attribute
  my $s = '[\ \t\n\r]';
  $someText =~ m(^<\?xml $s+ version $s* = $s* ([\'\"]) [-._:a-zA-Z0-9]+ \1 $s+
                  encoding $s* = $s* ([\'\"]) ([A-Za-z][-._A-Za-z0-9]*) \2
                )xso;

  $File->{Charset}->{XML} = lc $3;
  return $File;
}

#
# Abort with a message if an error was flagged at point.
sub abort_if_error_flagged {
  my $File  = shift;
  my $Flags = shift;

  return unless $File->{'Error Flagged'};

  &prep_template($File, $E);

  $E->param(error_message => $File->{'Error Message'});
  print $E->output;
  exit;
}

#
# conflicting encodings
sub conflict ($$) {return $_[0] && $_[1] && ($_[0] ne $_[1])};


#
# Construct a self-referential URL.
sub self_url {
  my $q = shift;
  my $thispage = $File->{Env}->{'Self URI'};
     $thispage .= '?uri='       . uri_escape($q->param('uri'));
     $thispage .= ';ss=1'      if $q->param('ss');
     $thispage .= ';sp=1'      if $q->param('sp');
     $thispage .= ';noatt=1'   if $q->param('noatt');
     $thispage .= ';outline=1' if $q->param('outline');
     $thispage .= ';No200=1'   if $q->param('No200');
     $thispage .= ';verbose=1' if $q->param('verbose');
  if ($q->param('doctype')
      and not $q->param('doctype') =~ /(Inline|detect)/i) {
    $thispage .= ';doctype=' . uri_escape($q->param('doctype'));
  }
  if ($q->param('charset') and not $q->param('charset') =~ /detect/i) {
    $thispage .= ';charset=' . uri_escape($q->param('charset'));
  }
  return $thispage;
}


#
# Return random Tip with it's URL.
sub get_tip {
  my @tipAddrs = keys %{$CFG->{Tips_DB}};
  my $tipAddr = $tipAddrs[rand scalar @tipAddrs];
  my $tipSlug = $CFG->{Tips_DB}->{$tipAddr};

  return [$tipAddr, $tipSlug];
}