GenericUri.class.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************************
00003  *   Copyright (C) 2007 by Ivan Y. Khvostishkov                            *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU Lesser General Public License as        *
00007  *   published by the Free Software Foundation; either version 3 of the    *
00008  *   License, or (at your option) any later version.                       *
00009  *                                                                         *
00010  ***************************************************************************/
00011 
00017     class GenericUri implements Stringable
00018     {
00019         const CHARS_UNRESERVED      = 'a-z0-9-._~';
00020         const CHARS_SUBDELIMS       = '!$&\'()*+,;=';
00021         const PATTERN_PCTENCODED    = '%[0-9a-f][0-9a-f]';
00022         
00023         protected $scheme       = null;
00024         
00025         protected $userInfo = null;
00026         protected $host     = null;
00027         protected $port     = null;
00028         
00029         protected $path     = null;
00030         protected $query    = null;
00031         protected $fragment = null;
00032         
00036         public static function create()
00037         {
00038             return new self;
00039         }
00040         
00044         final public function parse($uri, $guessClass = false)
00045         {
00046             $schemePattern = '([^:/?#]+):';
00047             $authorityPattern = '(//([^/?#]*))';
00048             $restPattern = '([^?#]*)(\?([^#]*))?(#(.*))?';
00049             $matches = array();
00050             
00051             if (
00052                 $guessClass
00053                 && ($knownSubSchemes = $this->getKnownSubSchemes())
00054                 && preg_match("~^{$schemePattern}~", $uri, $matches)
00055                 && isset($knownSubSchemes[strtolower($matches[1])])
00056             )
00057                 $class = $knownSubSchemes[strtolower($matches[1])];
00058             else
00059                 $class = get_class($this);
00060             
00061             $result = new $class;
00062             
00063             if ($result instanceof Url)
00064                 $pattern = "({$schemePattern}{$authorityPattern})?";
00065             elseif ($result instanceof Urn)
00066                 $pattern = "({$schemePattern})?";
00067             else
00068                 $pattern = "({$schemePattern})?{$authorityPattern}?";
00069             
00070             $pattern = "~^{$pattern}{$restPattern}$~";
00071             
00072             if (!preg_match($pattern, $uri, $matches))
00073                 throw new WrongArgumentException('not well-formed URI');
00074             
00075             array_shift($matches);
00076             
00077             if ($matches[0])
00078                 $result->setScheme($matches[1]);
00079             
00080             array_shift($matches);
00081             array_shift($matches);
00082             
00083             if (!($result instanceof Urn)) {
00084                 if ($matches[0])
00085                     $result->setAuthority($matches[1]);
00086                 
00087                 array_shift($matches);
00088                 array_shift($matches);
00089             }
00090             
00091             $result->setPath($matches[0]);
00092             
00093             if (!empty($matches[1]))
00094                 $result->setQuery($matches[2]);
00095             
00096             if (!empty($matches[3]))
00097                 $result->setFragment($matches[4]);
00098             
00099             return $result;
00100         }
00101         
00106         final public function transform(GenericUri $reference, $strict = true)
00107         {
00108             if ($this->getScheme() === null)
00109                 throw new WrongStateException(
00110                     'URI without scheme cannot be a base URI'
00111                 );
00112             
00113             if (
00114                 $reference->getScheme() !== ($strict ? null : $this->getScheme())
00115             ) {
00116                 $class = get_class($reference);
00117                 $result = new $class;
00118                 
00119                 $result->
00120                     setScheme($reference->getScheme())->
00121                     setUserInfo($reference->getUserInfo())->
00122                     setHost($reference->getHost())->
00123                     setPort($reference->getPort())->
00124                     setPath(self::removeDotSegments($reference->getPath()))->
00125                     setQuery($reference->getQuery());
00126             } else {
00127                 $result = new $this;
00128                 
00129                 $result->setScheme($this->getScheme());
00130                 
00131                 if ($reference->getAuthority() !== null) {
00132                     $result->
00133                         setUserInfo($reference->getUserInfo())->
00134                         setHost($reference->getHost())->
00135                         setPort($reference->getPort())->
00136                         setPath(self::removeDotSegments($reference->getPath()))->
00137                         setQuery($reference->getQuery());
00138                 } else {
00139                     $result->
00140                         setUserInfo($this->getUserInfo())->
00141                         setHost($this->getHost())->
00142                         setPort($this->getPort());
00143                     
00144                     $path = $reference->getPath();
00145                     
00146                     if (!$path) {
00147                         $result->
00148                             setPath($this->getPath())->
00149                             setQuery(
00150                                 $reference->getQuery() !== null
00151                                 ? $reference->getQuery()
00152                                 : $this->getQuery()
00153                             );
00154                     } else {
00155                         $result->setQuery($reference->getQuery());
00156                         
00157                         if ($path[0] == '/')
00158                             $result->setPath($path);
00159                         else
00160                             $result->setPath(
00161                                 self::removeDotSegments(
00162                                     self::mergePath($reference->getPath())
00163                                 )
00164                             );
00165                     }
00166                 }
00167             }
00168             
00169             $result->setFragment($reference->getFragment());
00170             
00171             return $result;
00172         }
00173         
00174         public function getKnownSubSchemes()
00175         {
00176             return array_merge(
00177                 Urn::create()->getKnownSubSchemes(),
00178                 Url::create()->getKnownSubSchemes()
00179             );
00180         }
00181         
00185         public function setScheme($scheme)
00186         {
00187             $this->scheme = $scheme;
00188             
00189             return $this;
00190         }
00191         
00192         public function getScheme()
00193         {
00194             return $this->scheme;
00195         }
00196         
00200         public function setUserInfo($userInfo)
00201         {
00202             $this->userInfo = $userInfo;
00203             
00204             return $this;
00205         }
00206         
00207         public function getUserInfo()
00208         {
00209             return $this->userInfo;
00210         }
00211         
00215         public function setHost($host)
00216         {
00217             $this->host = $host;
00218             
00219             return $this;
00220         }
00221         
00222         public function getHost()
00223         {
00224             return $this->host;
00225         }
00226         
00230         public function setPort($port)
00231         {
00232             $this->port = $port;
00233             
00234             return $this;
00235         }
00236         
00237         public function getPort()
00238         {
00239             return $this->port;
00240         }
00241         
00245         public function setPath($path)
00246         {
00247             $this->path = $path;
00248             
00249             return $this;
00250         }
00251         
00252         public function getPath()
00253         {
00254             return $this->path;
00255         }
00256         
00260         public function setQuery($query)
00261         {
00262             $this->query = $query;
00263             
00264             return $this;
00265         }
00266         
00270         public function appendQuery($string, $separator = '&')
00271         {
00272             $query = $this->query;
00273             
00274             if ($query)
00275                 $query .= $separator;
00276             
00277             $query .= $string;
00278             
00279             $this->setQuery($query);
00280             
00281             return $this;
00282         }
00283         
00284         public function getQuery()
00285         {
00286             return $this->query;
00287         }
00288         
00292         public function setFragment($fragment)
00293         {
00294             $this->fragment = $fragment;
00295             
00296             return $this;
00297         }
00298         
00299         public function getFragment()
00300         {
00301             return $this->fragment;
00302         }
00303         
00307         public function setAuthority($authority)
00308         {
00309             $authorityPattern = '~^(([^@]*)@)?((\[.+\])|([^:]*))(:(.*))?$~';
00310             $authorityMatches = array();
00311             
00312             if (
00313                 !preg_match(
00314                     $authorityPattern, $authority, $authorityMatches
00315                 )
00316             )
00317                 throw new WrongArgumentException(
00318                     'not well-formed authority part'
00319                 );
00320             
00321             if ($authorityMatches[1])
00322                 $this->setUserInfo($authorityMatches[2]);
00323             
00324             $this->setHost($authorityMatches[3]);
00325             
00326             if (!empty($authorityMatches[6]))
00327                 $this->setPort($authorityMatches[7]);
00328             
00329             return $this;
00330         }
00331         
00332         public function getAuthority()
00333         {
00334             $result = null;
00335             
00336             if ($this->userInfo !== null)
00337                 $result .= $this->userInfo.'@';
00338             
00339             if ($this->host !== null)
00340                 $result .= $this->host;
00341             
00342             if ($this->port !== null)
00343                 $result .= ':'.$this->port;
00344             
00345             return $result;
00346         }
00347         
00348         public function setSchemeSpecificPart($schemeSpecificPart)
00349         {
00350             throw new UnsupportedMethodException('use parse() instead');
00351         }
00352         
00353         public function getSchemeSpecificPart()
00354         {
00355             $result = null;
00356             
00357             $authority = $this->getAuthority();
00358             
00359             if ($authority !== null)
00360                 $result .= '//'.$authority;
00361             
00362             $result .= $this->path;
00363             
00364             if ($this->query !== null)
00365                 $result .= '?'.$this->query;
00366             
00367             if ($this->fragment !== null)
00368                 $result .= '#'.$this->fragment;
00369             
00370             return $result;
00371         }
00372         
00373         public function toString()
00374         {
00375             $result = null;
00376             
00377             if ($this->scheme !== null)
00378                 $result .= $this->scheme.':';
00379             
00380             $result .= $this->getSchemeSpecificPart();
00381             
00382             return $result;
00383         }
00384         
00385         public function toStringFromRoot()
00386         {
00387             $result = $this->path;
00388             
00389             if ($this->query !== null)
00390                 $result .= '?'.$this->query;
00391             
00392             if ($this->fragment !== null)
00393                 $result .= '#'.$this->fragment;
00394             
00395             return $result;
00396         }
00397         
00398         public function isValid()
00399         {
00400             return
00401                 $this->isValidScheme()
00402                 && $this->isValidUserInfo()
00403                 && $this->isValidHost()
00404                 && $this->isValidPort()
00405                 && $this->isValidPath()
00406                 && $this->isValidQuery()
00407                 && $this->isValidFragment();
00408         }
00409         
00410         public function isValidScheme()
00411         {
00412             // empty string is NOT valid
00413             return (
00414                 $this->scheme === null
00415                 || preg_match('~^[a-z][-+.a-z0-9]*$~i', $this->scheme) == 1
00416             );
00417         }
00418         
00419         public function isValidUserInfo()
00420         {
00421             // empty string IS valid
00422             if (!$this->userInfo)
00423                 return true;
00424             
00425             $charPattern = $this->userInfoCharPattern();
00426             
00427             return (preg_match("/^$charPattern*$/i", $this->userInfo) == 1);
00428         }
00429         
00430         public function isValidHost()
00431         {
00432             // empty string IS valid
00433             if (empty($this->host))
00434                 return true;
00435             
00436             $decOctet =
00437                 '(\d)|'         // 0-9
00438                 .'([1-9]\d)|'   // 10-99
00439                 .'(1\d\d)|'     // 100-199
00440                 .'(2[0-4]\d)|'  // 200-249
00441                 .'(25[0-5])';   // 250-255
00442             
00443             $ipV4Address = "($decOctet)\.($decOctet)\.($decOctet)\.($decOctet)";
00444             
00445             $hexdig = '[0-9a-f]';
00446             
00447             $h16 = "$hexdig{1,4}";
00448             $ls32 = "(($h16:$h16)|($ipV4Address))";
00449             
00450             $ipV6Address =
00451                 "  (                        ($h16:){6} $ls32)"
00452                 ."|(                      ::($h16:){5} $ls32)"
00453                 ."|(              ($h16)? ::($h16:){4} $ls32)"
00454                 ."|( (($h16:){0,1} $h16)? ::($h16:){3} $ls32)"
00455                 ."|( (($h16:){0,2} $h16)? ::($h16:){2} $ls32)"
00456                 ."|( (($h16:){0,3} $h16)? :: $h16:     $ls32)"
00457                 ."|( (($h16:){0,4} $h16)? ::           $ls32)"
00458                 ."|( (($h16:){0,5} $h16)? ::           $h16 )"
00459                 ."|( (($h16:){0,6} $h16)? ::                )";
00460             
00461             $unreserved = self::CHARS_UNRESERVED;
00462             $subDelims = self::CHARS_SUBDELIMS;
00463             
00464             $ipVFutureAddress =
00465                 "v$hexdig+\.[{$unreserved}{$subDelims}:]+";
00466             
00467             if (
00468                 preg_match(
00469                     "/^\[(($ipV6Address)|($ipVFutureAddress))\]$/ix",
00470                     $this->host
00471                 )
00472             )
00473                 return true;
00474             
00475             if (preg_match("/^$ipV4Address$/i", $this->host)) {
00476                 return true;
00477             }
00478             
00479             return $this->isValidHostName();
00480         }
00481         
00482         public function isValidPort()
00483         {
00484             // empty string IS valid
00485             if (!$this->port)
00486                 return true;
00487             
00488             if (!preg_match('~^\d*$~', $this->port))
00489                 return false;
00490             
00491             return ($this->port > 0 && $this->port <= 65535);
00492         }
00493         
00494         public function isValidPath()
00495         {
00496             $charPattern = $this->segmentCharPattern();
00497             
00498             if (
00499                 !preg_match(
00500                     "/^($charPattern+)?"
00501                     ."(\/$charPattern*)*$/i",
00502                     $this->path
00503                 )
00504             )
00505                 return false;
00506             
00507             if ($this->getAuthority() !== null) {
00508                 // abempty
00509                 if (empty($this->path) || $this->path[0] == '/')
00510                     return true;
00511                 
00512             } elseif ($this->path && $this->path[0] == '/') {
00513                 // absolute
00514                 if ($this->path == '/' || $this->path[1] != '/')
00515                     return true;
00516                 
00517             } elseif ($this->scheme === null && $this->path) {
00518                 // noscheme - first segment must be w/o colon
00519                 
00520                 $segments = explode('/', $this->path);
00521                 
00522                 if (strpos($segments[0], ':') === false)
00523                     return true;
00524                 
00525             } elseif ($this->path) {
00526                 // rootless
00527                 if ($this->path[0] != '/')
00528                     return true;
00529                 
00530             } elseif (!$this->path) {
00531                 // empty
00532                 return true;
00533             }
00534             
00535             return false;
00536         }
00537         
00538         public function isValidQuery()
00539         {
00540             // empty string IS valid
00541             return $this->isValidFragmentOrQuery($this->query);
00542         }
00543         
00544         public function isValidFragment()
00545         {
00546             // empty string IS valid
00547             return $this->isValidFragmentOrQuery($this->fragment);
00548         }
00549         
00550         public function isAbsolute()
00551         {
00552             return ($this->scheme !== null);
00553         }
00554         
00555         public function isRelative()
00556         {
00557             return ($this->scheme === null);
00558         }
00559         
00560         protected function isValidHostName()
00561         {
00562             $charPattern = $this->hostNameCharPattern();
00563             
00564             return (
00565                 preg_match(
00566                     "/^$charPattern*$/i",
00567                     $this->host
00568                 ) == 1
00569             );
00570         }
00571         
00572         protected function charPattern(
00573             $extraChars = null, $pctEncodedPattern = true
00574         )
00575         {
00576             $unreserved = self::CHARS_UNRESERVED;
00577             $subDelims = self::CHARS_SUBDELIMS;
00578             $pctEncoded = self::PATTERN_PCTENCODED;
00579             
00580             $result = "{$unreserved}{$subDelims}$extraChars";
00581             
00582             if ($pctEncodedPattern)
00583                 $result = "(([{$result}])|({$pctEncoded}))";
00584             
00585             return $result;
00586         }
00587         
00588         protected function userInfoCharPattern($pctEncoded = true)
00589         {
00590             return $this->charPattern(':', $pctEncoded);
00591         }
00592         
00593         protected function hostNameCharPattern($pctEncoded = true)
00594         {
00595             return $this->charPattern(null, $pctEncoded);
00596         }
00597         
00598         protected function segmentCharPattern($pctEncoded = true)
00599         {
00600             return $this->charPattern(':@', $pctEncoded);
00601         }
00602         
00603         protected function fragmentOrQueryCharPattern($pctEncoded = true)
00604         {
00605             return $this->charPattern(':@\/?', $pctEncoded);
00606         }
00607         
00608         private function isValidFragmentOrQuery($string)
00609         {
00610             $charPattern = $this->fragmentOrQueryCharPattern();
00611             
00612             return (preg_match("/^$charPattern*$/i", $string) == 1);
00613         }
00614         
00615         private static function removeDotSegments($path)
00616         {
00617             $segments = array();
00618             
00619             while ($path) {
00620                 if (strpos($path, '../') === 0) {
00621                     $path = substr($path, 3);
00622                     
00623                 } elseif (strpos($path, './') === 0) {
00624                     $path = substr($path, 2);
00625                     
00626                 } elseif (strpos($path, '/./') === 0) {
00627                     $path = substr($path, 2);
00628                     
00629                 } elseif ($path == '/.') {
00630                     $path = '/';
00631                     
00632                 } elseif (strpos($path, '/../') === 0) {
00633                     $path = substr($path, 3);
00634                     
00635                     if ($segments) {
00636                         array_pop($segments);
00637                     }
00638                     
00639                 } elseif ($path == '/..') {
00640                     $path = '/';
00641                     
00642                     if ($segments) {
00643                         array_pop($segments);
00644                     }
00645                     
00646                 } elseif (($path == '..') || ($path == '.')) {
00647                     $path = null;
00648                     
00649                 } else {
00650                     $i = 0;
00651                     
00652                     if ($path[0] == '/')
00653                         $i = 1;
00654                     
00655                     $i = strpos($path, '/', $i);
00656                     
00657                     if ($i === false)
00658                         $i = strlen($path);
00659                     
00660                     $segments[] = substr($path, 0, $i);
00661                     
00662                     $path = substr($path, $i);
00663                 }
00664             }
00665             
00666             return implode('', $segments);
00667         }
00668         
00669         private function mergePath($path)
00670         {
00671             if ($this->getAuthority() !== null && !$this->getPath())
00672                 return '/'.$path;
00673             
00674             $segments = explode('/', $this->path);
00675             
00676             array_pop($segments);
00677             
00678             return implode('/', $segments).'/'.$path;
00679         }
00680         
00684         public function normalize()
00685         {
00686             // 1. case
00687             if ($this->getScheme() !== null)
00688                 $this->setScheme(mb_strtolower($this->getScheme()));
00689             
00690             // 2. percent-encoded
00691             $this->
00692                 setHost(
00693                     $this->normalizePercentEncoded(
00694                         $this->getHost(), $this->hostNameCharPattern(false)
00695                     )
00696                 )->
00697                 setUserInfo(
00698                     $this->normalizePercentEncoded(
00699                         $this->getUserInfo(), $this->userInfoCharPattern(false)
00700                     )
00701                 )->
00702                 setPath(
00703                     self::removeDotSegments(
00704                         $this->normalizePercentEncoded(
00705                             $this->getPath(),
00706                             '\/'.$this->segmentCharPattern(false)
00707                         )
00708                     )
00709                 )->
00710                 setQuery(
00711                     $this->normalizePercentEncoded(
00712                         $this->getQuery(),
00713                         $this->fragmentOrQueryCharPattern(false)
00714                     )
00715                 )->
00716                 setFragment(
00717                     $this->normalizePercentEncoded(
00718                         $this->getFragment(),
00719                         $this->fragmentOrQueryCharPattern(false)
00720                     )
00721                 );
00722             
00723             // 3. and case again
00724             if ($this->getHost() !== null)
00725                 $this->setHost(mb_strtolower($this->getHost()));
00726             
00727             return $this;
00728         }
00729         
00730         private function normalizePercentEncoded(
00731             $string, $unreservedPartChars
00732         )
00733         {
00734             if ($string === null)
00735                 return null;
00736             
00737             $result = preg_replace_callback(
00738                 '/(('.self::PATTERN_PCTENCODED.')|(.))/sui',
00739                 array(
00740                     PercentEncodingNormalizator::create()->
00741                         setUnreservedPartChars($unreservedPartChars),
00742                     'normalize'
00743                 ),
00744                 $string
00745             );
00746             
00747             return $result;
00748         }
00749     }
00750     
00754     final class PercentEncodingNormalizator
00755     {
00756         private $unreservedPartChars = null;
00757         
00761         public static function create()
00762         {
00763             return new self;
00764         }
00765         
00769         public function setUnreservedPartChars($unreservedPartChars)
00770         {
00771             $this->unreservedPartChars = $unreservedPartChars;
00772             return $this;
00773         }
00774         
00775         public function normalize($matched)
00776         {
00777             $char = $matched[0];
00778             if (mb_strlen($char) == 1) {
00779                 if (
00780                     !preg_match(
00781                         '/^['.$this->unreservedPartChars.']$/',
00782                         $char
00783                     )
00784                 )
00785                     $char = rawurlencode($char);
00786             } else {
00787                 if (
00788                     preg_match(
00789                         '/^['.GenericUri::CHARS_UNRESERVED.']$/',
00790                         rawurldecode($char)
00791                     )
00792                 )
00793                     $char = rawurldecode($char);
00794                 else
00795                     $char = strtoupper($char);
00796             }
00797             return $char;
00798         }
00799     }
00800 ?>