summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorot <ot@localhost>2008-01-17 08:09:24 +0000
committerot <ot@localhost>2008-01-17 08:09:24 +0000
commit5c9fad8c2442f188e571aa05a4d04758cb125121 (patch)
treedc369ffe17aea24b704df8872bb449fc6b30d322
parent68a39d805cb15ead3a3f7691c00c1287aa2f2528 (diff)
downloadmarkup-validator-5c9fad8c2442f188e571aa05a4d04758cb125121.zip
markup-validator-5c9fad8c2442f188e571aa05a4d04758cb125121.tar.gz
markup-validator-5c9fad8c2442f188e571aa05a4d04758cb125121.tar.bz2
check that in a DOCTYPE declaration for a known document type, FPI and SI match
-rw-r--r--htdocs/config/types.conf18
-rwxr-xr-xhttpd/cgi-bin/check24
-rw-r--r--share/templates/en_US/warnings.tmpl9
3 files changed, 38 insertions, 13 deletions
diff --git a/htdocs/config/types.conf b/htdocs/config/types.conf
index f82b578..95d04ab 100644
--- a/htdocs/config/types.conf
+++ b/htdocs/config/types.conf
@@ -1,7 +1,7 @@
#
# Main Document Type Database for the W3C Markup Validation Service.
#
-# $Id: types.conf,v 1.36 2007-12-06 02:52:50 ot Exp $
+# $Id: types.conf,v 1.37 2008-01-17 08:09:23 ot Exp $
#
# Maintains all information for each of the document types we support.
# See 'perldoc Config::General' for the syntax, and be aware that the
@@ -206,7 +206,7 @@
Display = HTML 4.01 Strict
Info URL = http://www.w3.org/TR/1999/REC-html401-19991224/
PubID = -//W3C//DTD HTML 4.01//EN
- SysID = http://www.w3.org/TR/1999/REC-html401-19991224/strict.dtd
+ SysID = http://www.w3.org/TR/html4/strict.dtd
Parse Mode = SGML
<Types>
Allowed = text/html
@@ -228,7 +228,7 @@
Display = HTML 4.01 Transitional
Info URL = http://www.w3.org/TR/1999/REC-html401-19991224/
PubID = -//W3C//DTD HTML 4.01 Transitional//EN
- SysID = http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd
+ SysID = http://www.w3.org/TR/html4/loose.dtd
Parse Mode = SGML
<Types>
Allowed = text/html
@@ -250,7 +250,7 @@
Display = HTML 4.01 Frameset
Info URL = http://www.w3.org/TR/1999/REC-html401-19991224/
PubID = -//W3C//DTD HTML 4.01 Frameset//EN
- SysID = http://www.w3.org/TR/1999/REC-html401-19991224/frameset.dtd
+ SysID = http://www.w3.org/TR/html4/frameset.dtd
Parse Mode = SGML
<Types>
Allowed = text/html
@@ -272,7 +272,7 @@
Display = XHTML 1.0 Strict
Info URL = http://www.w3.org/TR/xhtml1/
PubID = -//W3C//DTD XHTML 1.0 Strict//EN
- SysID = http://www.w3.org/TR/2002/REC-xhtml1-20020801/DTD/xhtml1-strict.dtd
+ SysID = http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd
Namespace = http://www.w3.org/1999/xhtml
Namespace Required = 1 # per http://www.w3.org/TR/xhtml1/#normative
Parse Mode = XML
@@ -296,7 +296,7 @@
Display = XHTML 1.0 Transitional
Info URL = http://www.w3.org/TR/xhtml1/
PubID = -//W3C//DTD XHTML 1.0 Transitional//EN
- SysID = http://www.w3.org/TR/2002/REC-xhtml1-20020801/DTD/xhtml1-transitional.dtd
+ SysID = http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
Namespace = http://www.w3.org/1999/xhtml
Namespace Required = 1 # per http://www.w3.org/TR/xhtml1/#normative
Parse Mode = XML
@@ -320,7 +320,7 @@
Display = XHTML 1.0 Frameset
Info URL = http://www.w3.org/TR/xhtml1/
PubID = -//W3C//DTD XHTML 1.0 Frameset//EN
- SysID = http://www.w3.org/TR/2002/REC-xhtml1-20020801/DTD/xhtml1-frameset.dtd
+ SysID = http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd
Namespace = http://www.w3.org/1999/xhtml
Namespace Required = 1 # per http://www.w3.org/TR/xhtml1/#normative
Parse Mode = XML
@@ -344,7 +344,7 @@
Display = XHTML Basic 1.0
Info URL = http://www.w3.org/TR/xhtml-basic/
PubID = -//W3C//DTD XHTML Basic 1.0//EN
- SysID = http://www.w3.org/TR/2000/REC-xhtml-basic-20001219/xhtml-basic10.dtd
+ SysID = http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd
Namespace = http://www.w3.org/1999/xhtml
Namespace Required = 0 # per http://www.w3.org/TR/2000/REC-xhtml-basic-20001219/#s2.1 it is unclear whether the namespace is fixed, or required
Parse Mode = XML
@@ -451,7 +451,7 @@
Display = XHTML 1.1
Info URL = http://www.w3.org/TR/xhtml11/
PubID = -//W3C//DTD XHTML 1.1//EN
- SysID = http://www.w3.org/TR/2001/REC-xhtml11-20010531/DTD/xhtml11-flat.dtd
+ SysID = http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd
Namespace = http://www.w3.org/1999/xhtml
Namespace Required = 1 # per http://www.w3.org/TR/xhtml11/conformance.html#s_conform
Parse Mode = XML
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index dafaffa..16a4f54 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.578 2007-11-21 06:55:48 ot Exp $
+# $Id: check,v 1.579 2008-01-17 08:09:23 ot Exp $
#
# Disable buffering on STDOUT!
@@ -186,7 +186,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.578 $;
+ $VERSION = q$Revision: 1.579 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -1879,6 +1879,17 @@ sub source {
return \@source;
}
+
+sub match_DTD_FPI_SI {
+ my ($File, $FPI, $SI) = @_;
+ if ($CFG->{Types}->{$FPI}) {
+ if ($CFG->{Types}->{$FPI}->{SysID}){
+ if ($SI ne $CFG->{Types}->{$FPI}->{SysID}) {
+ &add_warning('W26', {W26_dtd_pub => $CFG->{Types}->{$FPI}->{Display}, W26_dtd_sys=> $SI, W26_dtd_sys_recommend=> $CFG->{Types}->{$FPI}->{SysID}});
+ }
+ }
+ }
+}
#
# Do an initial parse of the Document Entity to extract FPI.
sub preparse_doctype {
@@ -1892,7 +1903,13 @@ sub preparse_doctype {
my $dtd = sub {
return if $File->{Root};
# TODO: The \s and \w are probably wrong now that the strings are utf8_on
- ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w[\w\.-]+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
+ my $declaration = shift;
+ my $doctype_type;
+ my $doctype_secondpart;
+ ($File->{Root}, $doctype_type, $File->{DOCTYPE}, $doctype_secondpart) = $declaration =~ m(<!DOCTYPE\s+(\w[\w\.-]+)\s+(PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\'])(.*)>)si;
+ if (($doctype_type eq "PUBLIC") and (($doctype_secondpart) = $doctype_secondpart =~ m(\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*)si)){
+ &match_DTD_FPI_SI($File, $File->{DOCTYPE}, $doctype_secondpart);
+ }
};
my $start = sub {
@@ -2519,7 +2536,6 @@ sub W3C::Validator::SAXHandler::data
}
}
-
sub W3C::Validator::SAXHandler::start_element
{
my ($self, $element) = @_;
diff --git a/share/templates/en_US/warnings.tmpl b/share/templates/en_US/warnings.tmpl
index 30d9940..f15745e 100644
--- a/share/templates/en_US/warnings.tmpl
+++ b/share/templates/en_US/warnings.tmpl
@@ -465,6 +465,15 @@ to check these potential issues, and, if necessary, fix them and re-validate the
<p>Validation was performed as if the DOCTYPE for <TMPL_VAR NAME="W25_dtd" ESCAPE="HTML"> was present. If this automatic detection is not correct, <a href="http://www.w3.org/QA/2002/04/valid-dtd-list.html" title="W3C QA - Recommended list of DTDs you can use in your Web document">adding a DOCTYPE declaration</a> will help validate without ambiguity.</p>
</li>
</TMPL_IF>
+<TMPL_IF NAME="W26">
+<li class="msg_warn" id="W26"><p><span class="err_type"><img src="images/info_icons/warning.png" alt="Warning" title="Warning" /></span> <span class="msg">Broken DOCTYPE detected</span></p>
+ <p>The document uses a DOCTYPE declaration with the <code><TMPL_VAR NAME="W26_dtd_sys" ESCAPE="HTML"></code> System Identifier, but the recommended System Identifier
+ for <code><TMPL_VAR NAME="W26_dtd_pub" ESCAPE="HTML"></code> is <code><TMPL_VAR NAME="W26_dtd_sys_recommend" ESCAPE="HTML"></code>.</p>
+ <p>The safest way to use a correct DOCTYPE declaration is to copy and paste one from the
+ <a href="http://www.w3.org/QA/2002/04/valid-dtd-list.html" title="W3C QA - Recommended list of DTDs you can use in your Web document">recommended list</a>
+ and avoid editing that part of your markup by hand.</p>
+ </li>
+</TMPL_IF>
<TMPL_IF NAME="W@@">
<li class="msg_warn" id="W@@">