diff options
-rw-r--r-- | Auth/OpenID/Consumer.php | 1 | ||||
-rw-r--r-- | Auth/OpenID/Consumer/Parse.php | 289 | ||||
-rw-r--r-- | Auth/OpenID/HTTPFetcher.php | 13 | ||||
-rw-r--r-- | Auth/OpenID/Parse.php | 290 | ||||
-rw-r--r-- | Tests/Auth/OpenID/Parse.php | 5 |
5 files changed, 303 insertions, 295 deletions
diff --git a/Auth/OpenID/Consumer.php b/Auth/OpenID/Consumer.php index 3dbbc60..f27972c 100644 --- a/Auth/OpenID/Consumer.php +++ b/Auth/OpenID/Consumer.php @@ -188,7 +188,6 @@ require_once "Auth/OpenID.php"; require_once "Auth/OpenID/HMACSHA1.php"; require_once "Auth/OpenID/Association.php"; require_once "Auth/OpenID/AuthenticationRequest.php"; -require_once "Auth/OpenID/Consumer/Parse.php"; require_once "Auth/OpenID/CryptUtil.php"; require_once "Auth/OpenID/DiffieHellman.php"; require_once "Auth/OpenID/KVForm.php"; diff --git a/Auth/OpenID/Consumer/Parse.php b/Auth/OpenID/Consumer/Parse.php deleted file mode 100644 index e63e03e..0000000 --- a/Auth/OpenID/Consumer/Parse.php +++ /dev/null @@ -1,289 +0,0 @@ -<?php - -/** - * This module implements a VERY limited parser that finds <link> tags - * in the head of HTML or XHTML documents and parses out their - * attributes according to the OpenID spec. It is a liberal parser, - * but it requires these things from the data in order to work: - * - * - There must be an open <html> tag - * - * - There must be an open <head> tag inside of the <html> tag - * - * - Only <link>s that are found inside of the <head> tag are parsed - * (this is by design) - * - * - The parser follows the OpenID specification in resolving the - * attributes of the link tags. This means that the attributes DO - * NOT get resolved as they would by an XML or HTML parser. In - * particular, only certain entities get replaced, and href - * attributes do not get resolved relative to a base URL. - * - * From http://openid.net/specs.bml: - * - * - The openid.server URL MUST be an absolute URL. OpenID consumers - * MUST NOT attempt to resolve relative URLs. - * - * - The openid.server URL MUST NOT include entities other than &, - * <, >, and ". - * - * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds - * of quoting are allowed for attributes. - * - * The parser deals with invalid markup in these ways: - * - * - Tag names are not case-sensitive - * - * - The <html> tag is accepted even when it is not at the top level - * - * - The <head> tag is accepted even when it is not a direct child of - * the <html> tag, but a <html> tag must be an ancestor of the - * <head> tag - * - * - <link> tags are accepted even when they are not direct children - * of the <head> tag, but a <head> tag must be an ancestor of the - * <link> tag - * - * - If there is no closing tag for an open <html> or <head> tag, the - * remainder of the document is viewed as being inside of the - * tag. If there is no closing tag for a <link> tag, the link tag is - * treated as a short tag. Exceptions to this rule are that <html> - * closes <html> and <body> or <head> closes <head> - * - * - Attributes of the <link> tag are not required to be quoted. - * - * - In the case of duplicated attribute names, the attribute coming - * last in the tag will be the value returned. - * - * - Any text that does not parse as an attribute within a link tag - * will be ignored. (e.g. <link pumpkin rel='openid.server' /> will - * ignore pumpkin) - * - * - If there are more than one <html> or <head> tag, the parser only - * looks inside of the first one. - * - * - The contents of <script> tags are ignored entirely, except - * unclosed <script> tags. Unclosed <script> tags are ignored. - * - * - Any other invalid markup is ignored, including unclosed SGML - * comments and unclosed <![CDATA[blocks. - * - * PHP versions 4 and 5 - * - * LICENSE: See the COPYING file included in this distribution. - * - * @access private - * @package OpenID - * @author JanRain, Inc. <openid@janrain.com> - * @copyright 2005 Janrain, Inc. - * @license http://www.gnu.org/copyleft/lesser.html LGPL - */ - -/** - * Specify some flags for use with regex matching. - */ -$_Auth_OpenID_re_flags = "si"; - -/** - * Stuff to remove before we start looking for tags - */ -$_Auth_OpenID_removed_re = "<!--.*?-->|" . - "<!\[CDATA\[.*?\]\]>|" . - "<script\b(?!:)[^>]*>.*?<\/script>"; - -/** - * Starts with the tag name at a word boundary, where the tag name is - * not a namespace - */ -$_Auth_OpenID_tag_expr = "<%s\b(?!:)([^>]*?)" . - "(?:\/>|>(.*?)" . - "(?:<\/?%s\s*>|\Z))"; - -/** - * Returns a regular expression that will match a given tag in an SGML - * string. - */ -function Auth_OpenID_tagMatcher($tag_name, $close_tags = null) -{ - global $_Auth_OpenID_tag_expr, $_Auth_OpenID_re_flags; - - if ($close_tags) { - $options = implode("|", array_merge(array($tag_name), $close_tags)); - $closer = sprintf("(?:%s)", $options); - } else { - $closer = $tag_name; - } - - $expr = sprintf($_Auth_OpenID_tag_expr, $tag_name, $closer); - return sprintf("/%s/%s", $expr, $_Auth_OpenID_re_flags); -} - -function Auth_OpenID_htmlFind() -{ - return Auth_OpenID_tagMatcher('html'); -} - -function Auth_OpenID_headFind() -{ - return Auth_OpenID_tagMatcher('head', array('body')); -} - -$_Auth_OpenID_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)'; - -$_Auth_OpenID_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s", - $_Auth_OpenID_re_flags); - -$_Auth_OpenID_entity_replacements = array( - 'amp' => '&', - 'lt' => '<', - 'gt' => '>', - 'quot' => '"' - ); - -$_Auth_OpenID_attr_find = sprintf("/%s/%s", - $_Auth_OpenID_attr_find, - $_Auth_OpenID_re_flags); - -$_Auth_OpenID_removed_re = sprintf("/%s/%s", - $_Auth_OpenID_removed_re, - $_Auth_OpenID_re_flags); - -$_Auth_OpenID_ent_replace = - sprintf("&(%s);", implode("|", - $_Auth_OpenID_entity_replacements)); - -function Auth_OpenID_replace_entities($str) -{ - global $_Auth_OpenID_entity_replacements; - foreach ($_Auth_OpenID_entity_replacements as $old => $new) { - $str = preg_replace(sprintf("/&%s;/", $old), $new, $str); - } - return $str; -} - -function Auth_OpenID_remove_quotes($str) -{ - $matches = array(); - $double = '/^"(.*)"$/'; - $single = "/^\'(.*)\'$/"; - - if (preg_match($double, $str, $matches)) { - return $matches[1]; - } else if (preg_match($single, $str, $matches)) { - return $matches[1]; - } else { - return $str; - } -} - -/** - * Find all link tags in a string representing a HTML document and - * return a list of their attributes. - * - * @param string $html The text to parse - * @return array $list An array of arrays of attributes, one for each - * link tag - */ -function Auth_OpenID_parseLinkAttrs($html) -{ - - global $_Auth_OpenID_removed_re, - $_Auth_OpenID_link_find, - $_Auth_OpenID_attr_find; - - $stripped = preg_replace($_Auth_OpenID_removed_re, - "", - $html); - - // Try to find the <HTML> tag. - $html_re = Auth_OpenID_htmlFind(); - $html_matches = array(); - if (!preg_match($html_re, $stripped, $html_matches)) { - return array(); - } - - // Try to find the <HEAD> tag. - $head_re = Auth_OpenID_headFind(); - $head_matches = array(); - if (!preg_match($head_re, $html_matches[0], $head_matches)) { - return array(); - } - - $link_data = array(); - $link_matches = array(); - - if (!preg_match_all($_Auth_OpenID_link_find, $head_matches[0], - $link_matches)) { - return array(); - } - - foreach ($link_matches[0] as $link) { - $attr_matches = array(); - preg_match_all($_Auth_OpenID_attr_find, $link, $attr_matches); - $link_attrs = array(); - foreach ($attr_matches[0] as $index => $full_match) { - $name = $attr_matches[1][$index]; - $value = Auth_OpenID_replace_entities( - Auth_OpenID_remove_quotes( - $attr_matches[2][$index])); - - $link_attrs[$name] = $value; - } - $link_data[] = $link_attrs; - } - - return $link_data; -} - -function Auth_OpenID_relMatches($rel_attr, $target_rel) -{ - // Does this target_rel appear in the rel_str? - // XXX: TESTME - $rels = preg_split("/\s+/", trim($rel_attr)); - foreach ($rels as $rel) { - $rel = strtolower($rel); - if ($rel == $target_rel) { - return 1; - } - } - - return 0; -} - -function Auth_OpenID_linkHasRel($link_attrs, $target_rel) -{ - // Does this link have target_rel as a relationship? - // XXX: TESTME - $rel_attr = Auth_OpenID_arrayGet($link_attrs, 'rel', null); - return ($rel_attr && Auth_OpenID_relMatches($rel_attr, $target_rel)); -} - -function Auth_OpenID_findLinksRel($link_attrs_list, $target_rel) -{ - // Filter the list of link attributes on whether it has target_rel - // as a relationship. - // XXX: TESTME - $result = array(); - foreach ($link_attrs_list as $attr) { - if (Auth_OpenID_linkHasRel($attr, $target_rel)) { - $result[] = $attr; - } - } - - return $result; -} - -function Auth_OpenID_findFirstHref($link_attrs_list, $target_rel) -{ - // Return the value of the href attribute for the first link tag - // in the list that has target_rel as a relationship. - // XXX: TESTME - $matches = Auth_OpenID_findLinksRel($link_attrs_list, $target_rel); - if (!$matches) { - return null; - } - $first = $matches[0]; - return Auth_OpenID_arrayGet($first, 'href', null); -} - -?>
\ No newline at end of file diff --git a/Auth/OpenID/HTTPFetcher.php b/Auth/OpenID/HTTPFetcher.php index 7cc957c..7fac490 100644 --- a/Auth/OpenID/HTTPFetcher.php +++ b/Auth/OpenID/HTTPFetcher.php @@ -13,6 +13,11 @@ */ /** + * Require the parser. + */ +require_once "Auth/OpenID/Parse.php"; + +/** * This is the status code beginAuth returns when it is unable to * fetch the OpenID URL the user entered. */ @@ -129,9 +134,11 @@ class Auth_OpenID_HTTPFetcher { return array(Auth_OpenID_HTTP_FAILURE, $http_code); } - $link_attrs = Auth_OpenID_parseLinkAttrs($data); - $server = Auth_OpenID_findFirstHref($link_attrs, 'openid.server'); - $delegate = Auth_OpenID_findFirstHref($link_attrs, 'openid.delegate'); + $parser = new Auth_OpenID_Parse(); + + $link_attrs = $parser->parseLinkAttrs($data); + $server = $parser->findFirstHref($link_attrs, 'openid.server'); + $delegate = $parser->findFirstHref($link_attrs, 'openid.delegate'); if ($server === null) { return array(Auth_OpenID_PARSE_ERROR, null); diff --git a/Auth/OpenID/Parse.php b/Auth/OpenID/Parse.php new file mode 100644 index 0000000..999c032 --- /dev/null +++ b/Auth/OpenID/Parse.php @@ -0,0 +1,290 @@ +<?php + +/** + * This module implements a VERY limited parser that finds <link> tags + * in the head of HTML or XHTML documents and parses out their + * attributes according to the OpenID spec. It is a liberal parser, + * but it requires these things from the data in order to work: + * + * - There must be an open <html> tag + * + * - There must be an open <head> tag inside of the <html> tag + * + * - Only <link>s that are found inside of the <head> tag are parsed + * (this is by design) + * + * - The parser follows the OpenID specification in resolving the + * attributes of the link tags. This means that the attributes DO + * NOT get resolved as they would by an XML or HTML parser. In + * particular, only certain entities get replaced, and href + * attributes do not get resolved relative to a base URL. + * + * From http://openid.net/specs.bml: + * + * - The openid.server URL MUST be an absolute URL. OpenID consumers + * MUST NOT attempt to resolve relative URLs. + * + * - The openid.server URL MUST NOT include entities other than &, + * <, >, and ". + * + * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds + * of quoting are allowed for attributes. + * + * The parser deals with invalid markup in these ways: + * + * - Tag names are not case-sensitive + * + * - The <html> tag is accepted even when it is not at the top level + * + * - The <head> tag is accepted even when it is not a direct child of + * the <html> tag, but a <html> tag must be an ancestor of the + * <head> tag + * + * - <link> tags are accepted even when they are not direct children + * of the <head> tag, but a <head> tag must be an ancestor of the + * <link> tag + * + * - If there is no closing tag for an open <html> or <head> tag, the + * remainder of the document is viewed as being inside of the + * tag. If there is no closing tag for a <link> tag, the link tag is + * treated as a short tag. Exceptions to this rule are that <html> + * closes <html> and <body> or <head> closes <head> + * + * - Attributes of the <link> tag are not required to be quoted. + * + * - In the case of duplicated attribute names, the attribute coming + * last in the tag will be the value returned. + * + * - Any text that does not parse as an attribute within a link tag + * will be ignored. (e.g. <link pumpkin rel='openid.server' /> will + * ignore pumpkin) + * + * - If there are more than one <html> or <head> tag, the parser only + * looks inside of the first one. + * + * - The contents of <script> tags are ignored entirely, except + * unclosed <script> tags. Unclosed <script> tags are ignored. + * + * - Any other invalid markup is ignored, including unclosed SGML + * comments and unclosed <![CDATA[blocks. + * + * PHP versions 4 and 5 + * + * LICENSE: See the COPYING file included in this distribution. + * + * @access private + * @package OpenID + * @author JanRain, Inc. <openid@janrain.com> + * @copyright 2005 Janrain, Inc. + * @license http://www.gnu.org/copyleft/lesser.html LGPL + */ + +/** + * Require Auth_OpenID_arrayGet. + */ +require_once "Util.php"; + +class Auth_OpenID_Parse { + + /** + * Specify some flags for use with regex matching. + */ + var $_re_flags = "si"; + + /** + * Stuff to remove before we start looking for tags + */ + var $_removed_re = + "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>"; + + /** + * Starts with the tag name at a word boundary, where the tag name + * is not a namespace + */ + var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))"; + + var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)'; + + function Auth_OpenID_Parse() + { + $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s", + $this->_re_flags); + + $this->_entity_replacements = array( + 'amp' => '&', + 'lt' => '<', + 'gt' => '>', + 'quot' => '"' + ); + + $this->_attr_find = sprintf("/%s/%s", + $this->_attr_find, + $this->_re_flags); + + $this->_removed_re = sprintf("/%s/%s", + $this->_removed_re, + $this->_re_flags); + + $this->_ent_replace = + sprintf("&(%s);", implode("|", + $this->_entity_replacements)); + } + + /** + * Returns a regular expression that will match a given tag in an + * SGML string. + */ + function tagMatcher($tag_name, $close_tags = null) + { + if ($close_tags) { + $options = implode("|", array_merge(array($tag_name), $close_tags)); + $closer = sprintf("(?:%s)", $options); + } else { + $closer = $tag_name; + } + + $expr = sprintf($this->_tag_expr, $tag_name, $closer); + return sprintf("/%s/%s", $expr, $this->_re_flags); + } + + function htmlFind() + { + return $this->tagMatcher('html'); + } + + function headFind() + { + return $this->tagMatcher('head', array('body')); + } + + function replaceEntities($str) + { + foreach ($this->_entity_replacements as $old => $new) { + $str = preg_replace(sprintf("/&%s;/", $old), $new, $str); + } + return $str; + } + + function removeQuotes($str) + { + $matches = array(); + $double = '/^"(.*)"$/'; + $single = "/^\'(.*)\'$/"; + + if (preg_match($double, $str, $matches)) { + return $matches[1]; + } else if (preg_match($single, $str, $matches)) { + return $matches[1]; + } else { + return $str; + } + } + + /** + * Find all link tags in a string representing a HTML document and + * return a list of their attributes. + * + * @param string $html The text to parse + * @return array $list An array of arrays of attributes, one for each + * link tag + */ + function parseLinkAttrs($html) + { + $stripped = preg_replace($this->_removed_re, + "", + $html); + + // Try to find the <HTML> tag. + $html_re = $this->htmlFind(); + $html_matches = array(); + if (!preg_match($html_re, $stripped, $html_matches)) { + return array(); + } + + // Try to find the <HEAD> tag. + $head_re = $this->headFind(); + $head_matches = array(); + if (!preg_match($head_re, $html_matches[0], $head_matches)) { + return array(); + } + + $link_data = array(); + $link_matches = array(); + + if (!preg_match_all($this->_link_find, $head_matches[0], + $link_matches)) { + return array(); + } + + foreach ($link_matches[0] as $link) { + $attr_matches = array(); + preg_match_all($this->_attr_find, $link, $attr_matches); + $link_attrs = array(); + foreach ($attr_matches[0] as $index => $full_match) { + $name = $attr_matches[1][$index]; + $value = $this->replaceEntities( + $this->removeQuotes($attr_matches[2][$index])); + + $link_attrs[$name] = $value; + } + $link_data[] = $link_attrs; + } + + return $link_data; + } + + function relMatches($rel_attr, $target_rel) + { + // Does this target_rel appear in the rel_str? + // XXX: TESTME + $rels = preg_split("/\s+/", trim($rel_attr)); + foreach ($rels as $rel) { + $rel = strtolower($rel); + if ($rel == $target_rel) { + return 1; + } + } + + return 0; + } + + function linkHasRel($link_attrs, $target_rel) + { + // Does this link have target_rel as a relationship? + // XXX: TESTME + $rel_attr = Auth_OpeniD_arrayGet($link_attrs, 'rel', null); + return ($rel_attr && $this->relMatches($rel_attr, + $target_rel)); + } + + function findLinksRel($link_attrs_list, $target_rel) + { + // Filter the list of link attributes on whether it has + // target_rel as a relationship. + // XXX: TESTME + $result = array(); + foreach ($link_attrs_list as $attr) { + if ($this->linkHasRel($attr, $target_rel)) { + $result[] = $attr; + } + } + + return $result; + } + + function findFirstHref($link_attrs_list, $target_rel) + { + // Return the value of the href attribute for the first link + // tag in the list that has target_rel as a relationship. + // XXX: TESTME + $matches = $this->findLinksRel($link_attrs_list, + $target_rel); + if (!$matches) { + return null; + } + $first = $matches[0]; + return Auth_OpenID_arrayGet($first, 'href', null); + } +} + +?>
\ No newline at end of file diff --git a/Tests/Auth/OpenID/Parse.php b/Tests/Auth/OpenID/Parse.php index 4d5dbd3..1748651 100644 --- a/Tests/Auth/OpenID/Parse.php +++ b/Tests/Auth/OpenID/Parse.php @@ -14,7 +14,7 @@ */ require_once 'Tests/Auth/OpenID/TestUtil.php'; -require_once 'Auth/OpenID/Consumer/Parse.php'; +require_once 'Auth/OpenID/Parse.php'; require_once 'PHPUnit.php'; class Tests_Auth_OpenID_Link extends PHPUnit_TestCase { @@ -25,6 +25,7 @@ class Tests_Auth_OpenID_Link extends PHPUnit_TestCase { $this->markup = $markup; $this->expected_links = $links; $this->case_text = $case_text; + $this->parser = new Auth_OpenID_Parse(); } function getName() @@ -34,7 +35,7 @@ class Tests_Auth_OpenID_Link extends PHPUnit_TestCase { function runTest() { - $parsed = Auth_OpenID_parseLinkAttrs($this->markup); + $parsed = $this->parser->parseLinkAttrs($this->markup); $i = 0; foreach ($this->expected_links as $expected) { |