diff options
-rw-r--r-- | Net/OpenID/Consumer/Parse.php | 110 |
1 files changed, 99 insertions, 11 deletions
diff --git a/Net/OpenID/Consumer/Parse.php b/Net/OpenID/Consumer/Parse.php index ee9fa75..b49d462 100644 --- a/Net/OpenID/Consumer/Parse.php +++ b/Net/OpenID/Consumer/Parse.php @@ -20,17 +20,25 @@ */ $_Net_OpenID_re_flags = "si"; -// Stuff to remove before we start looking for tags +/** + * Stuff to remove before we start looking for tags + */ $_Net_OpenID_removed_re = "<!--.*?-->|" . "<!\[CDATA\[.*?\]\]>|" . - "<script\b(?!:)[^>]*>.*?</script>"; + "<script\b(?!:)[^>]*>.*?<\/script>"; -// Starts with the tag name at a word boundary, where the tag name is -// not a namespace +/** + * Starts with the tag name at a word boundary, where the tag name is + * not a namespace + */ $_Net_OpenID_tag_expr = "<%s\b(?!:)([^>]*?)" . - "(?:/>|>(.*?)" . - "(?:</?%s\s*>|\Z))"; + "(?:\/>|>(.*?)" . + "(?:<\/?%s\s*>|\Z))"; +/** + * Returns a regular expression that will match a given tag in an SGML + * string. + */ function Net_OpenID_tagMatcher($tag_name, $close_tags = null) { global $_Net_OpenID_tag_expr, $_Net_OpenID_re_flags; @@ -56,12 +64,11 @@ function Net_OpenID_head_find() return Net_OpenID_tagMatcher('head'); } -$_Net_OpenID_link_find = sprintf("/<link\b(?!:)/%s", $_Net_OpenID_re_flags); - -$_Net_OpenID_attr_find = "(\w+)=(?:[\"'](.*?)\\1|(?:[^\s<>/]|/(?!>))+)|[<>]/"; -$_Net_OpenID_attr_find = sprintf("/%s/%s", $_Net_OpenID_attr_find, +$_Net_OpenID_link_find = sprintf("/<link\b(?!:)[^\>]*>/%s", $_Net_OpenID_re_flags); +$_Net_OpenID_attr_find = '(\w+)=("[^"]*"|\'[^\']*\'|[^\s>]*)'; + $_Net_OpenID_entity_replacements = array( 'amp' => '&', 'lt' => '<', @@ -69,8 +76,89 @@ $_Net_OpenID_entity_replacements = array( 'quot' => '"' ); -function Net_OpenID_entity_replace() +$_Net_OpenID_attr_find = sprintf("/%s/%s", + $_Net_OpenID_attr_find, + $_Net_OpenID_re_flags); + +$_Net_OpenID_removed_re = sprintf("/%s/%s", + $_Net_OpenID_removed_re, + $_Net_OpenID_re_flags); + +$_Net_OpenID_ent_replace = + sprintf("&(%s);", implode("|", + $_Net_OpenID_entity_replacements)); + +function Net_OpenID_replace_entities($str) +{ + global $_Net_OpenID_entity_replacements; + foreach ($_Net_OpenID_entity_replacements as $old => $new) { + $str = preg_replace(sprintf("/&%s;/", $old), $new, $str); + } + return $str; +} + +function Net_OpenID_remove_quotes($str) +{ + $matches = array(); + $double = '/^"(.*)"$/'; + $single = "/^\'(.*)\'$/"; + + if (preg_match($double, $str, $matches)) { + return $matches[1]; + } else if (preg_match($single, $str, $matches)) { + return $matches[1]; + } else { + return $str; + } +} + +function Net_OpenID_parseLinkAttrs($html) { + + global $_Net_OpenID_removed_re, + $_Net_OpenID_link_find, + $_Net_OpenID_attr_find; + + $stripped = preg_replace($_Net_OpenID_removed_re, + "", + $html); + + // Try to find the <HTML> tag. + $html_re = Net_OpenID_html_find(); + $html_matches = array(); + if (!preg_match($html_re, $stripped, $html_matches)) { + return array(); + } + + // Try to find the <HEAD> tag. + $head_re = Net_OpenID_head_find(); + $head_matches = array(); + if (!preg_match($head_re, $html_matches[0], $head_matches)) { + return array(); + } + + $link_data = array(); + $link_matches = array(); + + if (!preg_match_all($_Net_OpenID_link_find, $head_matches[0], + $link_matches)) { + return array(); + } + + foreach ($link_matches[0] as $link) { + $attr_matches = array(); + preg_match_all($_Net_OpenID_attr_find, $link, $attr_matches); + $link_attrs = array(); + foreach ($attr_matches[0] as $index => $full_match) { + $link_attrs[$attr_matches[1][$index]] = + Net_OpenID_replace_entities( + Net_OpenID_remove_quotes( + $attr_matches[2][$index])); + } + $link_data[] = $link_attrs; + } + + return $link_data; } ?>
\ No newline at end of file |