diff options
-rw-r--r-- | Auth/OpenID/URINorm.php | 252 | ||||
-rw-r--r-- | Tests/Auth/OpenID/URINorm.php | 68 | ||||
-rw-r--r-- | Tests/Auth/OpenID/data/urinorm.txt | 79 | ||||
-rw-r--r-- | Tests/TestDriver.php | 3 |
4 files changed, 401 insertions, 1 deletions
diff --git a/Auth/OpenID/URINorm.php b/Auth/OpenID/URINorm.php new file mode 100644 index 0000000..a26f5df --- /dev/null +++ b/Auth/OpenID/URINorm.php @@ -0,0 +1,252 @@ +<?php + +/** + * URI normalization routines. + * + * @package OpenID + * @author JanRain, Inc. <openid@janrain.com> + * @copyright 2005 Janrain, Inc. + * @license http://www.gnu.org/copyleft/lesser.html LGPL + */ + +// from appendix B of rfc 3986 (http://www.ietf.org/rfc/rfc3986.txt) +$__uri_pattern = '&^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?&'; +$__authority_pattern = '/^([^@]*@)?([^:]*)(:.*)?/'; +$__pct_encoded_pattern = '/%([0-9A-Fa-f]{2})/'; + +$__UCSCHAR = array( + array(0xA0, 0xD7FF), + array(0xF900, 0xFDCF), + array(0xFDF0, 0xFFEF), + array(0x10000, 0x1FFFD), + array(0x20000, 0x2FFFD), + array(0x30000, 0x3FFFD), + array(0x40000, 0x4FFFD), + array(0x50000, 0x5FFFD), + array(0x60000, 0x6FFFD), + array(0x70000, 0x7FFFD), + array(0x80000, 0x8FFFD), + array(0x90000, 0x9FFFD), + array(0xA0000, 0xAFFFD), + array(0xB0000, 0xBFFFD), + array(0xC0000, 0xCFFFD), + array(0xD0000, 0xDFFFD), + array(0xE1000, 0xEFFFD) + ); + +$__IPRIVATE = array( + array(0xE000, 0xF8FF), + array(0xF0000, 0xFFFFD), + array(0x100000, 0x10FFFD) + ); + +$_unreserved = array(); +for ($i = 0; $i < 256; $i++) { + $_unreserved[$i] = false; +} + +for ($i = ord('A'); $i <= ord('Z'); $i++) { + $_unreserved[$i] = true; +} + +for ($i = ord('0'); $i <= ord('9'); $i++) { + $_unreserved[$i] = true; +} + +for ($i = ord('a'); $i <= ord('z'); $i++) { + $_unreserved[$i] = true; +} + +$_unreserved[ord('-')] = true; +$_unreserved[ord('.')] = true; +$_unreserved[ord('_')] = true; +$_unreserved[ord('~')] = true; + +$parts = array(); +foreach (array_merge($__UCSCHAR, $__IPRIVATE) as $pair) { + list($m, $n) = $pair; + $parts[] = sprintf("%s-%s", $m, $n); +} + +$_escapeme_re = sprintf('[%s]', implode('', $parts)); + +function startswith($s, $stuff) +{ + return strpos($s, $stuff) === 0; +} + +function _pct_escape_unicode($char_match) +{ + $c = $char_match[0]; + $result = ""; + for ($i = 0; $i < strlen($c); $i++) { + $result .= "%".sprintf("%X", ord($c[$i])); + } + return $result; +} + +function _pct_encoded_replace_unreserved($mo) +{ + global $_unreserved; + + $i = intval($mo[1], 16); + if ($_unreserved[$i]) { + return chr($i); + } else { + return strtoupper($mo[0]); + } + + return $mo[0]; +} + +function _pct_encoded_replace($mo) +{ + return chr(intval($mo[1], 16)); +} + +function remove_dot_segments($path) +{ + $result_segments = array(); + + while ($path) { + if (startswith($path, '../')) { + $path = substr($path, 3); + } else if (startswith($path, './')) { + $path = substr($path, 2); + } else if (startswith($path, '/./')) { + $path = substr($path, 2); + } else if ($path == '/.') { + $path = '/'; + } else if (startswith($path, '/../')) { + $path = substr($path, 3); + if ($result_segments) { + array_pop($result_segments); + } + } else if ($path == '/..') { + $path = '/'; + if ($result_segments) { + array_pop($result_segments); + } + } else if (($path == '..') || + ($path == '.')) { + $path = ''; + } else { + $i = 0; + if ($path[0] == '/') { + $i = 1; + } + $i = strpos($path, '/', $i); + if ($i === false) { + $i = strlen($path); + } + $result_segments[] = substr($path, 0, $i); + $path = substr($path, $i); + } + } + + return implode('', $result_segments); +} + +function Auth_OpenID_urinorm($uri) +{ + global $__uri_pattern, $__authority_pattern, $__pct_encoded_pattern; + + $uri_matches = array(); + preg_match($__uri_pattern, $uri, $uri_matches); + + if (count($uri_matches) < 9) { + for ($i = count($uri_matches); $i <= 9; $i++) { + $uri_matches[] = ''; + } + } + + print_r($uri_matches); + + $scheme = $uri_matches[2]; + if ($scheme) { + $scheme = strtolower($scheme); + } + + $scheme = $uri_matches[2]; + if ($scheme === '') { + // No scheme specified + return null; + } + + $scheme = strtolower($scheme); + if (!in_array($scheme, array('http', 'https'))) { + // Not an absolute HTTP or HTTPS URI + return null; + } + + $authority = $uri_matches[4]; + if ($authority === '') { + // Not an absolute URI + return null; + } + + $authority_matches = array(); + preg_match($__authority_pattern, $authority, $authority_matches); + if (count($authority_matches) === 0) { + // URI does not have a valid authority + return null; + } + + if (count($authority_matches) < 4) { + for ($i = count($authority_matches); $i <= 4; $i++) { + $authority_matches[] = ''; + } + } + + list($_whole, $userinfo, $host, $port) = $authority_matches; + + if ($userinfo === null) { + $userinfo = ''; + } + + if (strpos($host, '%') !== -1) { + $host = strtolower($host); + $host = preg_replace_callback( + $__pct_encoded_pattern, '_pct_encoded_replace', $host); + // NO IDNA. + // $host = unicode($host, 'utf-8').encode('idna'); + } else { + $host = strtolower($host); + } + + if ($port) { + if (($port == ':') || + ($scheme == 'http' && $port == ':80') || + ($scheme == 'https' && $port == ':443')) { + $port = ''; + } + } else { + $port = ''; + } + + $authority = $userinfo . $host . $port; + + $path = $uri_matches[5]; + $path = preg_replace_callback( + $__pct_encoded_pattern, + '_pct_encoded_replace_unreserved', $path); + + $path = remove_dot_segments($path); + if (!$path) { + $path = '/'; + } + + $query = $uri_matches[6]; + if ($query === null) { + $query = ''; + } + + $fragment = $uri_matches[8]; + if ($fragment === null) { + $fragment = ''; + } + + return $scheme . '://' . $authority . $path . $query . $fragment; +} + +?>
\ No newline at end of file diff --git a/Tests/Auth/OpenID/URINorm.php b/Tests/Auth/OpenID/URINorm.php new file mode 100644 index 0000000..02263af --- /dev/null +++ b/Tests/Auth/OpenID/URINorm.php @@ -0,0 +1,68 @@ +<?php + +/** + * Tests for the URI normalization routines used by the OpenID + * library. + * + * PHP versions 4 and 5 + * + * LICENSE: See the COPYING file included in this distribution. + * + * @package OpenID + * @author JanRain, Inc. <openid@janrain.com> + * @copyright 2005 Janrain, Inc. + * @license http://www.gnu.org/copyleft/lesser.html LGPL + */ + +require_once 'PHPUnit.php'; +require_once 'Auth/OpenID/URINorm.php'; +require_once 'Tests/Auth/OpenID/TestUtil.php'; + +class Tests_Auth_OpenID_URINorm_TestCase extends PHPUnit_TestCase { + function Tests_Auth_OpenID_URINorm_TestCase( + $name, $uri, $expected) + { + + $this->setName($name); + $this->uri = $uri; + $this->expected = $expected; + } + + function runTest() + { + $actual = Auth_OpenID_urinorm($this->uri); + $this->assertEquals($this->expected, $actual); + } +} + +class Tests_Auth_OpenID_URINorm extends PHPUnit_TestSuite { + function _readTestCases() + { + $lines = Tests_Auth_OpenID_readlines('urinorm.txt'); + $cases = array(); + $case = array(); + for ($i = 0; $i < count($lines) && ($i + 3 <= count($lines)); $i += 4) { + $name = trim($lines[$i]); + $uri = trim($lines[$i + 1]); + $expected = trim($lines[$i + 2]); + if ($expected == 'fail') { + $expected = null; + } + $cases[] = array($name, $uri, $expected); + } + + return $cases; + } + + function Tests_Auth_OpenID_URINorm($name) + { + $this->setName($name); + $cases = $this->_readTestCases(); + foreach ($cases as $case) { + list($name, $uri, $expected) = $case; + $this->addTest(new Tests_Auth_OpenID_URINorm_TestCase($name, $uri, $expected)); + } + } +} + +?>
\ No newline at end of file diff --git a/Tests/Auth/OpenID/data/urinorm.txt b/Tests/Auth/OpenID/data/urinorm.txt new file mode 100644 index 0000000..95262fe --- /dev/null +++ b/Tests/Auth/OpenID/data/urinorm.txt @@ -0,0 +1,79 @@ +Already normal form +http://example.com/ +http://example.com/ + +Add a trailing slash +http://example.com +http://example.com/ + +Remove an empty port segment +http://example.com:/ +http://example.com/ + +Remove a default port segment +http://example.com:80/ +http://example.com/ + +Capitalization in host names +http://wWw.exaMPLE.COm/ +http://www.example.com/ + +Capitalization in scheme names +htTP://example.com/ +http://example.com/ + +Capitalization in percent-escaped reserved characters +http://example.com/foo%2cbar +http://example.com/foo%2Cbar + +Unescape percent-encoded unreserved characters +http://example.com/foo%2Dbar%2dbaz +http://example.com/foo-bar-baz + +remove_dot_segments example 1 +http://example.com/a/b/c/./../../g +http://example.com/a/g + +remove_dot_segments example 2 +http://example.com/mid/content=5/../6 +http://example.com/mid/6 + +remove_dot_segments: single-dot +http://example.com/a/./b +http://example.com/a/b + +remove_dot_segments: double-dot +http://example.com/a/../b +http://example.com/b + +remove_dot_segments: leading double-dot +http://example.com/../b +http://example.com/b + +remove_dot_segments: trailing single-dot +http://example.com/a/. +http://example.com/a/ + +remove_dot_segments: trailing double-dot +http://example.com/a/.. +http://example.com/ + +remove_dot_segments: trailing single-dot-slash +http://example.com/a/./ +http://example.com/a/ + +remove_dot_segments: trailing double-dot-slash +http://example.com/a/../ +http://example.com/ + +Test of all kinds of syntax-based normalization +hTTPS://a/./b/../b/%63/%7bfoo%7d +https://a/b/c/%7Bfoo%7D + +Unsupported scheme +ftp://example.com/ +fail + +Non-absolute URI +http:/foo +fail
\ No newline at end of file diff --git a/Tests/TestDriver.php b/Tests/TestDriver.php index 4827c3e..0fa94d9 100644 --- a/Tests/TestDriver.php +++ b/Tests/TestDriver.php @@ -117,7 +117,8 @@ $_test_names = array( 'Server', 'TrustRoot', 'Discover', - 'OpenID_Yadis' + 'OpenID_Yadis', + 'URINorm' ); function selectTests($names) |