diff options
author | tailor <cygnus@janrain.com> | 2006-01-14 00:11:03 +0000 |
---|---|---|
committer | tailor <cygnus@janrain.com> | 2006-01-14 00:11:03 +0000 |
commit | 8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562 (patch) | |
tree | 54373cfc3dcd6bb5266f43f779a86a8353515b64 | |
parent | c3eb2e8863d40dfcc227145879c8b97ccfaaf11a (diff) | |
download | php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.zip php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.tar.gz php-openid-8ee5a5830f2cd3d9f9a15df133d8aebbbc74a562.tar.bz2 |
[project @ Updated parser docs]
-rw-r--r-- | Net/OpenID/Consumer/Parse.php | 73 |
1 files changed, 72 insertions, 1 deletions
diff --git a/Net/OpenID/Consumer/Parse.php b/Net/OpenID/Consumer/Parse.php index 336a65d..5d63234 100644 --- a/Net/OpenID/Consumer/Parse.php +++ b/Net/OpenID/Consumer/Parse.php @@ -3,7 +3,70 @@ /** * This module implements a VERY limited parser that finds <link> tags * in the head of HTML or XHTML documents and parses out their - * attributes according to the OpenID spec. + * attributes according to the OpenID spec. It is a liberal parser, + * but it requires these things from the data in order to work: + * + * - There must be an open <html> tag + * + * - There must be an open <head> tag inside of the <html> tag + * + * - Only <link>s that are found inside of the <head> tag are parsed + * (this is by design) + * + * - The parser follows the OpenID specification in resolving the + * attributes of the link tags. This means that the attributes DO + * NOT get resolved as they would by an XML or HTML parser. In + * particular, only certain entities get replaced, and href + * attributes do not get resolved relative to a base URL. + * + * From http://openid.net/specs.bml: + * + * - The openid.server URL MUST be an absolute URL. OpenID consumers + * MUST NOT attempt to resolve relative URLs. + * + * - The openid.server URL MUST NOT include entities other than &, + * <, >, and ". + * + * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds + * of quoting are allowed for attributes. + * + * The parser deals with invalid markup in these ways: + * + * - Tag names are not case-sensitive + * + * - The <html> tag is accepted even when it is not at the top level + * + * - The <head> tag is accepted even when it is not a direct child of + * the <html> tag, but a <html> tag must be an ancestor of the + * <head> tag + * + * - <link> tags are accepted even when they are not direct children + * of the <head> tag, but a <head> tag must be an ancestor of the + * <link> tag + * + * - If there is no closing tag for an open <html> or <head> tag, the + * remainder of the document is viewed as being inside of the + * tag. If there is no closing tag for a <link> tag, the link tag is + * treated as a short tag. Exceptions to this rule are that <html> + * closes <html> and <body> or <head> closes <head> + * + * - Attributes of the <link> tag are not required to be quoted. + * + * - In the case of duplicated attribute names, the attribute coming + * last in the tag will be the value returned. + * + * - Any text that does not parse as an attribute within a link tag + * will be ignored. (e.g. <link pumpkin rel='openid.server' /> will + * ignore pumpkin) + * + * - If there are more than one <html> or <head> tag, the parser only + * looks inside of the first one. + * + * - The contents of <script> tags are ignored entirely, except + * unclosed <script> tags. Unclosed <script> tags are ignored. + * + * - Any other invalid markup is ignored, including unclosed SGML + * comments and unclosed <![CDATA[blocks. * * PHP versions 4 and 5 * @@ -112,6 +175,14 @@ function Net_OpenID_remove_quotes($str) } } +/** + * Find all link tags in a string representing a HTML document and + * return a list of their attributes. + * + * @param string $html The text to parse + * @return array $list An array of arrays of attributes, one for each + * link tag + */ function Net_OpenID_parseLinkAttrs($html) { |