summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Net/OpenID/Consumer/Parse.php110
1 files changed, 99 insertions, 11 deletions
diff --git a/Net/OpenID/Consumer/Parse.php b/Net/OpenID/Consumer/Parse.php
index ee9fa75..b49d462 100644
--- a/Net/OpenID/Consumer/Parse.php
+++ b/Net/OpenID/Consumer/Parse.php
@@ -20,17 +20,25 @@
*/
$_Net_OpenID_re_flags = "si";
-// Stuff to remove before we start looking for tags
+/**
+ * Stuff to remove before we start looking for tags
+ */
$_Net_OpenID_removed_re = "<!--.*?-->|" .
"<!\[CDATA\[.*?\]\]>|" .
- "<script\b(?!:)[^>]*>.*?</script>";
+ "<script\b(?!:)[^>]*>.*?<\/script>";
-// Starts with the tag name at a word boundary, where the tag name is
-// not a namespace
+/**
+ * Starts with the tag name at a word boundary, where the tag name is
+ * not a namespace
+ */
$_Net_OpenID_tag_expr = "<%s\b(?!:)([^>]*?)" .
- "(?:/>|>(.*?)" .
- "(?:</?%s\s*>|\Z))";
+ "(?:\/>|>(.*?)" .
+ "(?:<\/?%s\s*>|\Z))";
+/**
+ * Returns a regular expression that will match a given tag in an SGML
+ * string.
+ */
function Net_OpenID_tagMatcher($tag_name, $close_tags = null)
{
global $_Net_OpenID_tag_expr, $_Net_OpenID_re_flags;
@@ -56,12 +64,11 @@ function Net_OpenID_head_find()
return Net_OpenID_tagMatcher('head');
}
-$_Net_OpenID_link_find = sprintf("/<link\b(?!:)/%s", $_Net_OpenID_re_flags);
-
-$_Net_OpenID_attr_find = "(\w+)=(?:[\"'](.*?)\\1|(?:[^\s<>/]|/(?!>))+)|[<>]/";
-$_Net_OpenID_attr_find = sprintf("/%s/%s", $_Net_OpenID_attr_find,
+$_Net_OpenID_link_find = sprintf("/<link\b(?!:)[^\>]*>/%s",
$_Net_OpenID_re_flags);
+$_Net_OpenID_attr_find = '(\w+)=("[^"]*"|\'[^\']*\'|[^\s>]*)';
+
$_Net_OpenID_entity_replacements = array(
'amp' => '&',
'lt' => '<',
@@ -69,8 +76,89 @@ $_Net_OpenID_entity_replacements = array(
'quot' => '"'
);
-function Net_OpenID_entity_replace()
+$_Net_OpenID_attr_find = sprintf("/%s/%s",
+ $_Net_OpenID_attr_find,
+ $_Net_OpenID_re_flags);
+
+$_Net_OpenID_removed_re = sprintf("/%s/%s",
+ $_Net_OpenID_removed_re,
+ $_Net_OpenID_re_flags);
+
+$_Net_OpenID_ent_replace =
+ sprintf("&(%s);", implode("|",
+ $_Net_OpenID_entity_replacements));
+
+function Net_OpenID_replace_entities($str)
+{
+ global $_Net_OpenID_entity_replacements;
+ foreach ($_Net_OpenID_entity_replacements as $old => $new) {
+ $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
+ }
+ return $str;
+}
+
+function Net_OpenID_remove_quotes($str)
+{
+ $matches = array();
+ $double = '/^"(.*)"$/';
+ $single = "/^\'(.*)\'$/";
+
+ if (preg_match($double, $str, $matches)) {
+ return $matches[1];
+ } else if (preg_match($single, $str, $matches)) {
+ return $matches[1];
+ } else {
+ return $str;
+ }
+}
+
+function Net_OpenID_parseLinkAttrs($html)
{
+
+ global $_Net_OpenID_removed_re,
+ $_Net_OpenID_link_find,
+ $_Net_OpenID_attr_find;
+
+ $stripped = preg_replace($_Net_OpenID_removed_re,
+ "",
+ $html);
+
+ // Try to find the <HTML> tag.
+ $html_re = Net_OpenID_html_find();
+ $html_matches = array();
+ if (!preg_match($html_re, $stripped, $html_matches)) {
+ return array();
+ }
+
+ // Try to find the <HEAD> tag.
+ $head_re = Net_OpenID_head_find();
+ $head_matches = array();
+ if (!preg_match($head_re, $html_matches[0], $head_matches)) {
+ return array();
+ }
+
+ $link_data = array();
+ $link_matches = array();
+
+ if (!preg_match_all($_Net_OpenID_link_find, $head_matches[0],
+ $link_matches)) {
+ return array();
+ }
+
+ foreach ($link_matches[0] as $link) {
+ $attr_matches = array();
+ preg_match_all($_Net_OpenID_attr_find, $link, $attr_matches);
+ $link_attrs = array();
+ foreach ($attr_matches[0] as $index => $full_match) {
+ $link_attrs[$attr_matches[1][$index]] =
+ Net_OpenID_replace_entities(
+ Net_OpenID_remove_quotes(
+ $attr_matches[2][$index]));
+ }
+ $link_data[] = $link_attrs;
+ }
+
+ return $link_data;
}
?> \ No newline at end of file