5 files changed, 303 insertions, 295 deletions
diff --git a/Auth/OpenID/Consumer.php b/Auth/OpenID/Consumer.php
index 3dbbc60..f27972c 100644
--- a/Auth/OpenID/Consumer.php
+++ b/Auth/OpenID/Consumer.php
@@ -188,7 +188,6 @@ require_once "Auth/OpenID.php";
 require_once "Auth/OpenID/HMACSHA1.php";
 require_once "Auth/OpenID/Association.php";
 require_once "Auth/OpenID/AuthenticationRequest.php";
-require_once "Auth/OpenID/Consumer/Parse.php";
 require_once "Auth/OpenID/CryptUtil.php";
 require_once "Auth/OpenID/DiffieHellman.php";
 require_once "Auth/OpenID/KVForm.php";
diff --git a/Auth/OpenID/Consumer/Parse.php b/Auth/OpenID/Consumer/Parse.php
deleted file mode 100644
index e63e03e..0000000
--- a/Auth/OpenID/Consumer/Parse.php
+++ /dev/null
@@ -1,289 +0,0 @@
-<?php
-
-/**
- * This module implements a VERY limited parser that finds <link> tags
- * in the head of HTML or XHTML documents and parses out their
- * attributes according to the OpenID spec. It is a liberal parser,
- * but it requires these things from the data in order to work:
- *
- * - There must be an open <html> tag
- *
- * - There must be an open <head> tag inside of the <html> tag
- *
- * - Only <link>s that are found inside of the <head> tag are parsed
- *   (this is by design)
- *
- * - The parser follows the OpenID specification in resolving the
- *   attributes of the link tags. This means that the attributes DO
- *   NOT get resolved as they would by an XML or HTML parser. In
- *   particular, only certain entities get replaced, and href
- *   attributes do not get resolved relative to a base URL.
- *
- * From http://openid.net/specs.bml:
- *
- * - The openid.server URL MUST be an absolute URL. OpenID consumers
- *   MUST NOT attempt to resolve relative URLs.
- *
- * - The openid.server URL MUST NOT include entities other than &amp;,
- *   &lt;, &gt;, and &quot;.
- *
- * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
- * of quoting are allowed for attributes.
- *
- * The parser deals with invalid markup in these ways:
- *
- * - Tag names are not case-sensitive
- *
- * - The <html> tag is accepted even when it is not at the top level
- *
- * - The <head> tag is accepted even when it is not a direct child of
- *   the <html> tag, but a <html> tag must be an ancestor of the
- *   <head> tag
- *
- * - <link> tags are accepted even when they are not direct children
- *   of the <head> tag, but a <head> tag must be an ancestor of the
- *   <link> tag
- *
- * - If there is no closing tag for an open <html> or <head> tag, the
- *   remainder of the document is viewed as being inside of the
- *   tag. If there is no closing tag for a <link> tag, the link tag is
- *   treated as a short tag. Exceptions to this rule are that <html>
- *   closes <html> and <body> or <head> closes <head>
- *
- * - Attributes of the <link> tag are not required to be quoted.
- *
- * - In the case of duplicated attribute names, the attribute coming
- *   last in the tag will be the value returned.
- *
- * - Any text that does not parse as an attribute within a link tag
- *   will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
- *   ignore pumpkin)
- *
- * - If there are more than one <html> or <head> tag, the parser only
- *   looks inside of the first one.
- *
- * - The contents of <script> tags are ignored entirely, except
- *   unclosed <script> tags. Unclosed <script> tags are ignored.
- *
- * - Any other invalid markup is ignored, including unclosed SGML
- *   comments and unclosed <![CDATA[blocks.
- *
- * PHP versions 4 and 5
- *
- * LICENSE: See the COPYING file included in this distribution.
- *
- * @access private
- * @package OpenID
- * @author JanRain, Inc. <openid@janrain.com>
- * @copyright 2005 Janrain, Inc.
- * @license http://www.gnu.org/copyleft/lesser.html LGPL
- */
-
-/**
- * Specify some flags for use with regex matching.
- */
-$_Auth_OpenID_re_flags = "si";
-
-/**
- * Stuff to remove before we start looking for tags
- */
-$_Auth_OpenID_removed_re = "<!--.*?-->|" .
-                          "<!\[CDATA\[.*?\]\]>|" .
-                          "<script\b(?!:)[^>]*>.*?<\/script>";
-
-/**
- * Starts with the tag name at a word boundary, where the tag name is
- * not a namespace
- */
-$_Auth_OpenID_tag_expr = "<%s\b(?!:)([^>]*?)" .
-                        "(?:\/>|>(.*?)" .
-                        "(?:<\/?%s\s*>|\Z))";
-
-/**
- * Returns a regular expression that will match a given tag in an SGML
- * string.
- */
-function Auth_OpenID_tagMatcher($tag_name, $close_tags = null)
-{
-    global $_Auth_OpenID_tag_expr, $_Auth_OpenID_re_flags;
-
-    if ($close_tags) {
-        $options = implode("|", array_merge(array($tag_name), $close_tags));
-        $closer = sprintf("(?:%s)", $options);
-    } else {
-        $closer = $tag_name;
-    }
-
-    $expr = sprintf($_Auth_OpenID_tag_expr, $tag_name, $closer);
-    return sprintf("/%s/%s", $expr, $_Auth_OpenID_re_flags);
-}
-
-function Auth_OpenID_htmlFind()
-{
-    return Auth_OpenID_tagMatcher('html');
-}
-
-function Auth_OpenID_headFind()
-{
-    return Auth_OpenID_tagMatcher('head', array('body'));
-}
-
-$_Auth_OpenID_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
-
-$_Auth_OpenID_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
-                                 $_Auth_OpenID_re_flags);
-
-$_Auth_OpenID_entity_replacements = array(
-                                         'amp' => '&',
-                                         'lt' => '<',
-                                         'gt' => '>',
-                                         'quot' => '"'
-                                         );
-
-$_Auth_OpenID_attr_find = sprintf("/%s/%s",
-                                 $_Auth_OpenID_attr_find,
-                                 $_Auth_OpenID_re_flags);
-
-$_Auth_OpenID_removed_re = sprintf("/%s/%s",
-                                  $_Auth_OpenID_removed_re,
-                                  $_Auth_OpenID_re_flags);
-
-$_Auth_OpenID_ent_replace =
-     sprintf("&(%s);", implode("|",
-                               $_Auth_OpenID_entity_replacements));
-
-function Auth_OpenID_replace_entities($str)
-{
-    global $_Auth_OpenID_entity_replacements;
-    foreach ($_Auth_OpenID_entity_replacements as $old => $new) {
-        $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
-    }
-    return $str;
-}
-
-function Auth_OpenID_remove_quotes($str)
-{
-    $matches = array();
-    $double = '/^"(.*)"$/';
-    $single = "/^\'(.*)\'$/";
-
-    if (preg_match($double, $str, $matches)) {
-        return $matches[1];
-    } else if (preg_match($single, $str, $matches)) {
-        return $matches[1];
-    } else {
-        return $str;
-    }
-}
-
-/**
- * Find all link tags in a string representing a HTML document and
- * return a list of their attributes.
- *
- * @param string $html The text to parse
- * @return array $list An array of arrays of attributes, one for each
- * link tag
- */
-function Auth_OpenID_parseLinkAttrs($html)
-{
-
-    global $_Auth_OpenID_removed_re,
-        $_Auth_OpenID_link_find,
-        $_Auth_OpenID_attr_find;
-
-    $stripped = preg_replace($_Auth_OpenID_removed_re,
-                             "",
-                             $html);
-
-    // Try to find the <HTML> tag.
-    $html_re = Auth_OpenID_htmlFind();
-    $html_matches = array();
-    if (!preg_match($html_re, $stripped, $html_matches)) {
-        return array();
-    }
-
-    // Try to find the <HEAD> tag.
-    $head_re = Auth_OpenID_headFind();
-    $head_matches = array();
-    if (!preg_match($head_re, $html_matches[0], $head_matches)) {
-        return array();
-    }
-
-    $link_data = array();
-    $link_matches = array();
-
-    if (!preg_match_all($_Auth_OpenID_link_find, $head_matches[0],
-                        $link_matches)) {
-        return array();
-    }
-
-    foreach ($link_matches[0] as $link) {
-        $attr_matches = array();
-        preg_match_all($_Auth_OpenID_attr_find, $link, $attr_matches);
-        $link_attrs = array();
-        foreach ($attr_matches[0] as $index => $full_match) {
-            $name = $attr_matches[1][$index];
-            $value = Auth_OpenID_replace_entities(
-                       Auth_OpenID_remove_quotes(
-                         $attr_matches[2][$index]));
-
-            $link_attrs[$name] = $value;
-        }
-        $link_data[] = $link_attrs;
-    }
-
-    return $link_data;
-}
-
-function Auth_OpenID_relMatches($rel_attr, $target_rel)
-{
-    // Does this target_rel appear in the rel_str?
-    // XXX: TESTME
-    $rels = preg_split("/\s+/", trim($rel_attr));
-    foreach ($rels as $rel) {
-        $rel = strtolower($rel);
-        if ($rel == $target_rel) {
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-function Auth_OpenID_linkHasRel($link_attrs, $target_rel)
-{
-    // Does this link have target_rel as a relationship?
-    // XXX: TESTME
-    $rel_attr = Auth_OpenID_arrayGet($link_attrs, 'rel', null);
-    return ($rel_attr && Auth_OpenID_relMatches($rel_attr, $target_rel));
-}
-
-function Auth_OpenID_findLinksRel($link_attrs_list, $target_rel)
-{
-    // Filter the list of link attributes on whether it has target_rel
-    // as a relationship.
-    // XXX: TESTME
-    $result = array();
-    foreach ($link_attrs_list as $attr) {
-        if (Auth_OpenID_linkHasRel($attr, $target_rel)) {
-            $result[] = $attr;
-        }
-    }
-
-    return $result;
-}
-
-function Auth_OpenID_findFirstHref($link_attrs_list, $target_rel)
-{
-    // Return the value of the href attribute for the first link tag
-    // in the list that has target_rel as a relationship.
-    // XXX: TESTME
-    $matches = Auth_OpenID_findLinksRel($link_attrs_list, $target_rel);
-    if (!$matches) {
-        return null;
-    }
-    $first = $matches[0];
-    return Auth_OpenID_arrayGet($first, 'href', null);
-}
-
-?>
-\ No newline at end of file
diff --git a/Auth/OpenID/HTTPFetcher.php b/Auth/OpenID/HTTPFetcher.php
index 7cc957c..7fac490 100644
--- a/Auth/OpenID/HTTPFetcher.php
+++ b/Auth/OpenID/HTTPFetcher.php
@@ -13,6 +13,11 @@
  */
 
 /**
+ * Require the parser.
+ */
+require_once "Auth/OpenID/Parse.php";
+
+/**
  * This is the status code beginAuth returns when it is unable to
  * fetch the OpenID URL the user entered.
  */
@@ -129,9 +134,11 @@ class Auth_OpenID_HTTPFetcher {
             return array(Auth_OpenID_HTTP_FAILURE, $http_code);
         }
 
-        $link_attrs = Auth_OpenID_parseLinkAttrs($data);
-        $server = Auth_OpenID_findFirstHref($link_attrs, 'openid.server');
-        $delegate = Auth_OpenID_findFirstHref($link_attrs, 'openid.delegate');
+        $parser = new Auth_OpenID_Parse();
+
+        $link_attrs = $parser->parseLinkAttrs($data);
+        $server = $parser->findFirstHref($link_attrs, 'openid.server');
+        $delegate = $parser->findFirstHref($link_attrs, 'openid.delegate');
 
         if ($server === null) {
             return array(Auth_OpenID_PARSE_ERROR, null);
diff --git a/Auth/OpenID/Parse.php b/Auth/OpenID/Parse.php
new file mode 100644
index 0000000..999c032
--- /dev/null
+++ b/Auth/OpenID/Parse.php
@@ -0,0 +1,290 @@
+<?php
+
+/**
+ * This module implements a VERY limited parser that finds <link> tags
+ * in the head of HTML or XHTML documents and parses out their
+ * attributes according to the OpenID spec. It is a liberal parser,
+ * but it requires these things from the data in order to work:
+ *
+ * - There must be an open <html> tag
+ *
+ * - There must be an open <head> tag inside of the <html> tag
+ *
+ * - Only <link>s that are found inside of the <head> tag are parsed
+ *   (this is by design)
+ *
+ * - The parser follows the OpenID specification in resolving the
+ *   attributes of the link tags. This means that the attributes DO
+ *   NOT get resolved as they would by an XML or HTML parser. In
+ *   particular, only certain entities get replaced, and href
+ *   attributes do not get resolved relative to a base URL.
+ *
+ * From http://openid.net/specs.bml:
+ *
+ * - The openid.server URL MUST be an absolute URL. OpenID consumers
+ *   MUST NOT attempt to resolve relative URLs.
+ *
+ * - The openid.server URL MUST NOT include entities other than &amp;,
+ *   &lt;, &gt;, and &quot;.
+ *
+ * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
+ * of quoting are allowed for attributes.
+ *
+ * The parser deals with invalid markup in these ways:
+ *
+ * - Tag names are not case-sensitive
+ *
+ * - The <html> tag is accepted even when it is not at the top level
+ *
+ * - The <head> tag is accepted even when it is not a direct child of
+ *   the <html> tag, but a <html> tag must be an ancestor of the
+ *   <head> tag
+ *
+ * - <link> tags are accepted even when they are not direct children
+ *   of the <head> tag, but a <head> tag must be an ancestor of the
+ *   <link> tag
+ *
+ * - If there is no closing tag for an open <html> or <head> tag, the
+ *   remainder of the document is viewed as being inside of the
+ *   tag. If there is no closing tag for a <link> tag, the link tag is
+ *   treated as a short tag. Exceptions to this rule are that <html>
+ *   closes <html> and <body> or <head> closes <head>
+ *
+ * - Attributes of the <link> tag are not required to be quoted.
+ *
+ * - In the case of duplicated attribute names, the attribute coming
+ *   last in the tag will be the value returned.
+ *
+ * - Any text that does not parse as an attribute within a link tag
+ *   will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
+ *   ignore pumpkin)
+ *
+ * - If there are more than one <html> or <head> tag, the parser only
+ *   looks inside of the first one.
+ *
+ * - The contents of <script> tags are ignored entirely, except
+ *   unclosed <script> tags. Unclosed <script> tags are ignored.
+ *
+ * - Any other invalid markup is ignored, including unclosed SGML
+ *   comments and unclosed <![CDATA[blocks.
+ *
+ * PHP versions 4 and 5
+ *
+ * LICENSE: See the COPYING file included in this distribution.
+ *
+ * @access private
+ * @package OpenID
+ * @author JanRain, Inc. <openid@janrain.com>
+ * @copyright 2005 Janrain, Inc.
+ * @license http://www.gnu.org/copyleft/lesser.html LGPL
+ */
+
+/**
+ * Require Auth_OpenID_arrayGet.
+ */
+require_once "Util.php";
+
+class Auth_OpenID_Parse {
+
+    /**
+     * Specify some flags for use with regex matching.
+     */
+    var $_re_flags = "si";
+
+    /**
+     * Stuff to remove before we start looking for tags
+     */
+    var $_removed_re =
+           "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
+
+    /**
+     * Starts with the tag name at a word boundary, where the tag name
+     * is not a namespace
+     */
+    var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))";
+
+    var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
+
+    function Auth_OpenID_Parse()
+    {
+        $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
+                                    $this->_re_flags);
+
+        $this->_entity_replacements = array(
+                                            'amp' => '&',
+                                            'lt' => '<',
+                                            'gt' => '>',
+                                            'quot' => '"'
+                                            );
+
+        $this->_attr_find = sprintf("/%s/%s",
+                                    $this->_attr_find,
+                                    $this->_re_flags);
+
+        $this->_removed_re = sprintf("/%s/%s",
+                                     $this->_removed_re,
+                                     $this->_re_flags);
+
+        $this->_ent_replace =
+            sprintf("&(%s);", implode("|",
+                                      $this->_entity_replacements));
+    }
+
+    /**
+     * Returns a regular expression that will match a given tag in an
+     * SGML string.
+     */
+    function tagMatcher($tag_name, $close_tags = null)
+    {
+        if ($close_tags) {
+            $options = implode("|", array_merge(array($tag_name), $close_tags));
+            $closer = sprintf("(?:%s)", $options);
+        } else {
+            $closer = $tag_name;
+        }
+
+        $expr = sprintf($this->_tag_expr, $tag_name, $closer);
+        return sprintf("/%s/%s", $expr, $this->_re_flags);
+    }
+
+    function htmlFind()
+    {
+        return $this->tagMatcher('html');
+    }
+
+    function headFind()
+    {
+        return $this->tagMatcher('head', array('body'));
+    }
+
+    function replaceEntities($str)
+    {
+        foreach ($this->_entity_replacements as $old => $new) {
+            $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
+        }
+        return $str;
+    }
+
+    function removeQuotes($str)
+    {
+        $matches = array();
+        $double = '/^"(.*)"$/';
+        $single = "/^\'(.*)\'$/";
+
+        if (preg_match($double, $str, $matches)) {
+            return $matches[1];
+        } else if (preg_match($single, $str, $matches)) {
+            return $matches[1];
+        } else {
+            return $str;
+        }
+    }
+
+    /**
+     * Find all link tags in a string representing a HTML document and
+     * return a list of their attributes.
+     *
+     * @param string $html The text to parse
+     * @return array $list An array of arrays of attributes, one for each
+     * link tag
+     */
+    function parseLinkAttrs($html)
+    {
+        $stripped = preg_replace($this->_removed_re,
+                                 "",
+                                 $html);
+
+        // Try to find the <HTML> tag.
+        $html_re = $this->htmlFind();
+        $html_matches = array();
+        if (!preg_match($html_re, $stripped, $html_matches)) {
+            return array();
+        }
+
+        // Try to find the <HEAD> tag.
+        $head_re = $this->headFind();
+        $head_matches = array();
+        if (!preg_match($head_re, $html_matches[0], $head_matches)) {
+            return array();
+        }
+
+        $link_data = array();
+        $link_matches = array();
+
+        if (!preg_match_all($this->_link_find, $head_matches[0],
+                            $link_matches)) {
+            return array();
+        }
+
+        foreach ($link_matches[0] as $link) {
+            $attr_matches = array();
+            preg_match_all($this->_attr_find, $link, $attr_matches);
+            $link_attrs = array();
+            foreach ($attr_matches[0] as $index => $full_match) {
+                $name = $attr_matches[1][$index];
+                $value = $this->replaceEntities(
+                              $this->removeQuotes($attr_matches[2][$index]));
+
+                $link_attrs[$name] = $value;
+            }
+            $link_data[] = $link_attrs;
+        }
+
+        return $link_data;
+    }
+
+    function relMatches($rel_attr, $target_rel)
+    {
+        // Does this target_rel appear in the rel_str?
+        // XXX: TESTME
+        $rels = preg_split("/\s+/", trim($rel_attr));
+        foreach ($rels as $rel) {
+            $rel = strtolower($rel);
+            if ($rel == $target_rel) {
+                return 1;
+            }
+        }
+
+        return 0;
+    }
+
+    function linkHasRel($link_attrs, $target_rel)
+    {
+        // Does this link have target_rel as a relationship?
+        // XXX: TESTME
+        $rel_attr = Auth_OpeniD_arrayGet($link_attrs, 'rel', null);
+        return ($rel_attr && $this->relMatches($rel_attr,
+                                               $target_rel));
+    }
+
+    function findLinksRel($link_attrs_list, $target_rel)
+    {
+        // Filter the list of link attributes on whether it has
+        // target_rel as a relationship.
+        // XXX: TESTME
+        $result = array();
+        foreach ($link_attrs_list as $attr) {
+            if ($this->linkHasRel($attr, $target_rel)) {
+                $result[] = $attr;
+            }
+        }
+
+        return $result;
+    }
+
+    function findFirstHref($link_attrs_list, $target_rel)
+    {
+        // Return the value of the href attribute for the first link
+        // tag in the list that has target_rel as a relationship.
+        // XXX: TESTME
+        $matches = $this->findLinksRel($link_attrs_list,
+                                       $target_rel);
+        if (!$matches) {
+            return null;
+        }
+        $first = $matches[0];
+        return Auth_OpenID_arrayGet($first, 'href', null);
+    }
+}
+
+?>
+\ No newline at end of file
diff --git a/Tests/Auth/OpenID/Parse.php b/Tests/Auth/OpenID/Parse.php
index 4d5dbd3..1748651 100644
--- a/Tests/Auth/OpenID/Parse.php
+++ b/Tests/Auth/OpenID/Parse.php
@@ -14,7 +14,7 @@
  */
 
 require_once 'Tests/Auth/OpenID/TestUtil.php';
-require_once 'Auth/OpenID/Consumer/Parse.php';
+require_once 'Auth/OpenID/Parse.php';
 require_once 'PHPUnit.php';
 
 class Tests_Auth_OpenID_Link extends PHPUnit_TestCase {
@@ -25,6 +25,7 @@ class Tests_Auth_OpenID_Link extends PHPUnit_TestCase {
         $this->markup = $markup;
         $this->expected_links = $links;
         $this->case_text = $case_text;
+        $this->parser = new Auth_OpenID_Parse();
     }
 
     function getName()
@@ -34,7 +35,7 @@ class Tests_Auth_OpenID_Link extends PHPUnit_TestCase {
 
     function runTest()
     {
-        $parsed = Auth_OpenID_parseLinkAttrs($this->markup);
+        $parsed = $this->parser->parseLinkAttrs($this->markup);
         $i = 0;
 
         foreach ($this->expected_links as $expected) {