[project @ Updated parser docs]

author: tailor <cygnus@janrain.com> 2006-01-14 00:11:03 +0000
committer: tailor <cygnus@janrain.com> 2006-01-14 00:11:03 +0000
commit: 8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562 (patch)
tree: 54373cfc3dcd6bb5266f43f779a86a8353515b64
parent: c3eb2e8863d40dfcc227145879c8b97ccfaaf11a (diff)
download: php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.zip
php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.tar.gz
php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.tar.bz2
1 files changed, 72 insertions, 1 deletions
diff --git a/Net/OpenID/Consumer/Parse.php b/Net/OpenID/Consumer/Parse.php
index 336a65d..5d63234 100644
--- a/Net/OpenID/Consumer/Parse.php
+++ b/Net/OpenID/Consumer/Parse.php
@@ -3,7 +3,70 @@
 /**
  * This module implements a VERY limited parser that finds <link> tags
  * in the head of HTML or XHTML documents and parses out their
- * attributes according to the OpenID spec.
+ * attributes according to the OpenID spec. It is a liberal parser,
+ * but it requires these things from the data in order to work:
+ *
+ * - There must be an open <html> tag
+ *
+ * - There must be an open <head> tag inside of the <html> tag
+ *
+ * - Only <link>s that are found inside of the <head> tag are parsed
+ *   (this is by design)
+ *
+ * - The parser follows the OpenID specification in resolving the
+ *   attributes of the link tags. This means that the attributes DO
+ *   NOT get resolved as they would by an XML or HTML parser. In
+ *   particular, only certain entities get replaced, and href
+ *   attributes do not get resolved relative to a base URL.
+ *
+ * From http://openid.net/specs.bml:
+ *
+ * - The openid.server URL MUST be an absolute URL. OpenID consumers
+ *   MUST NOT attempt to resolve relative URLs.
+ *
+ * - The openid.server URL MUST NOT include entities other than &amp;,
+ *   &lt;, &gt;, and &quot;.
+ *
+ * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
+ * of quoting are allowed for attributes.
+ *
+ * The parser deals with invalid markup in these ways:
+ *
+ * - Tag names are not case-sensitive
+ *
+ * - The <html> tag is accepted even when it is not at the top level
+ *
+ * - The <head> tag is accepted even when it is not a direct child of
+ *   the <html> tag, but a <html> tag must be an ancestor of the
+ *   <head> tag
+ *
+ * - <link> tags are accepted even when they are not direct children
+ *   of the <head> tag, but a <head> tag must be an ancestor of the
+ *   <link> tag
+ *
+ * - If there is no closing tag for an open <html> or <head> tag, the
+ *   remainder of the document is viewed as being inside of the
+ *   tag. If there is no closing tag for a <link> tag, the link tag is
+ *   treated as a short tag. Exceptions to this rule are that <html>
+ *   closes <html> and <body> or <head> closes <head>
+ *
+ * - Attributes of the <link> tag are not required to be quoted.
+ *
+ * - In the case of duplicated attribute names, the attribute coming
+ *   last in the tag will be the value returned.
+ *
+ * - Any text that does not parse as an attribute within a link tag
+ *   will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
+ *   ignore pumpkin)
+ *
+ * - If there are more than one <html> or <head> tag, the parser only
+ *   looks inside of the first one.
+ *
+ * - The contents of <script> tags are ignored entirely, except
+ *   unclosed <script> tags. Unclosed <script> tags are ignored.
+ *
+ * - Any other invalid markup is ignored, including unclosed SGML
+ *   comments and unclosed <![CDATA[blocks.
  *
  * PHP versions 4 and 5
  *
@@ -112,6 +175,14 @@ function Net_OpenID_remove_quotes($str)
     }
 }
 
+/**
+ * Find all link tags in a string representing a HTML document and
+ * return a list of their attributes.
+ *
+ * @param string $html The text to parse
+ * @return array $list An array of arrays of attributes, one for each
+ * link tag
+ */
 function Net_OpenID_parseLinkAttrs($html)
 {
author	tailor <cygnus@janrain.com>	2006-01-14 00:11:03 +0000
committer	tailor <cygnus@janrain.com>	2006-01-14 00:11:03 +0000
commit	8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562 (patch)
tree	54373cfc3dcd6bb5266f43f779a86a8353515b64
parent	c3eb2e8863d40dfcc227145879c8b97ccfaaf11a (diff)
download	php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.zip php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.tar.gz php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.tar.bz2