<?php
/**
 *  base include file for SimpleTest
 *  @package    SimpleTest
 *  @subpackage WebTester
 *  @version    $Id: url.php 2011 2011-04-29 08:22:48Z pp11 $
 */

/**#@+
 *  include other SimpleTest class files
 */
require_once(dirname(__FILE__) . '/encoding.php');
/**#@-*/

/**
 *    URL parser to replace parse_url() PHP function which
 *    got broken in PHP 4.3.0. Adds some browser specific
 *    functionality such as expandomatics.
 *    Guesses a bit trying to separate the host from
 *    the path and tries to keep a raw, possibly unparsable,
 *    request string as long as possible.
 *    @package SimpleTest
 *    @subpackage WebTester
 */
class SimpleUrl {
    private $scheme;
    private $username;
    private $password;
    private $host;
    private $port;
    public $path;
    private $request;
    private $fragment;
    private $x;
    private $y;
    private $target;
    private $raw = false;

    /**
     *    Constructor. Parses URL into sections.
     *    @param string $url        Incoming URL.
     *    @access public
     */
    function __construct($url = '') {
        list($x, $y) = $this->chompCoordinates($url);
        $this->setCoordinates($x, $y);
        $this->scheme = $this->chompScheme($url);
        if ($this->scheme === 'file') {
            // Unescaped backslashes not used in directory separator context
            // will get caught by this, but they should have been urlencoded
            // anyway so we don't care. If this ends up being a problem, the
            // host regexp must be modified to match for backslashes when
            // the scheme is file.
            $url = str_replace('\\', '/', $url);
        }
        list($this->username, $this->password) = $this->chompLogin($url);
        $this->host = $this->chompHost($url);
        $this->port = false;
        if (preg_match('/(.*?):(.*)/', $this->host, $host_parts)) {
            if ($this->scheme === 'file' && strlen($this->host) === 2) {
                // DOS drive was placed in authority; promote it to path.
                $url = '/' . $this->host . $url;
                $this->host = false;
            } else {
                $this->host = $host_parts[1];
                $this->port = (integer)$host_parts[2];
            }
        }
        $this->path = $this->chompPath($url);
        $this->request = $this->parseRequest($this->chompRequest($url));
        $this->fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
        $this->target = false;
    }

    /**
     *    Extracts the X, Y coordinate pair from an image map.
     *    @param string $url   URL so far. The coordinates will be
     *                         removed.
     *    @return array        X, Y as a pair of integers.
     *    @access private
     */
    protected function chompCoordinates(&$url) {
        if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
            $url = $matches[1];
            return array((integer)$matches[2], (integer)$matches[3]);
        }
        return array(false, false);
    }

    /**
     *    Extracts the scheme part of an incoming URL.
     *    @param string $url   URL so far. The scheme will be
     *                         removed.
     *    @return string       Scheme part or false.
     *    @access private
     */
    protected function chompScheme(&$url) {
        if (preg_match('#^([^/:]*):(//)(.*)#', $url, $matches)) {
            $url = $matches[2] . $matches[3];
            return $matches[1];
        }
        return false;
    }

    /**
     *    Extracts the username and password from the
     *    incoming URL. The // prefix will be reattached
     *    to the URL after the doublet is extracted.
     *    @param string $url    URL so far. The username and
     *                          password are removed.
     *    @return array         Two item list of username and
     *                          password. Will urldecode() them.
     *    @access private
     */
    protected function chompLogin(&$url) {
        $prefix = '';
        if (preg_match('#^(//)(.*)#', $url, $matches)) {
            $prefix = $matches[1];
            $url = $matches[2];
        }
        if (preg_match('#^([^/]*)@(.*)#', $url, $matches)) {
            $url = $prefix . $matches[2];
            $parts = explode(":", $matches[1]);
            return array(
                    urldecode($parts[0]),
                    isset($parts[1]) ? urldecode($parts[1]) : false);
        }
        $url = $prefix . $url;
        return array(false, false);
    }

    /**
     *    Extracts the host part of an incoming URL.
     *    Includes the port number part. Will extract
     *    the host if it starts with // or it has
     *    a top level domain or it has at least two
     *    dots.
     *    @param string $url    URL so far. The host will be
     *                          removed.
     *    @return string        Host part guess or false.
     *    @access private
     */
    protected function chompHost(&$url) {
        if (preg_match('!^(//)(.*?)(/.*|\?.*|#.*|$)!', $url, $matches)) {
            $url = $matches[3];
            return $matches[2];
        }
        if (preg_match('!(.*?)(\.\./|\./|/|\?|#|$)(.*)!', $url, $matches)) {
            $tlds = SimpleUrl::getAllTopLevelDomains();
            if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
                $url = $matches[2] . $matches[3];
                return $matches[1];
            } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
                $url = $matches[2] . $matches[3];
                return $matches[1];
            }
        }
        return false;
    }

    /**
     *    Extracts the path information from the incoming
     *    URL. Strips this path from the URL.
     *    @param string $url     URL so far. The host will be
     *                           removed.
     *    @return string         Path part or '/'.
     *    @access private
     */
    protected function chompPath(&$url) {
        if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
            $url = $matches[2] . $matches[3];
            return ($matches[1] ? $matches[1] : '');
        }
        return '';
    }

    /**
     *    Strips off the request data.
     *    @param string $url  URL so far. The request will be
     *                        removed.
     *    @return string      Raw request part.
     *    @access private
     */
    protected function chompRequest(&$url) {
        if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
            $url = $matches[2] . $matches[3];
            return $matches[1];
        }
        return '';
    }

    /**
     *    Breaks the request down into an object.
     *    @param string $raw           Raw request.
     *    @return SimpleFormEncoding    Parsed data.
     *    @access private
     */
    protected function parseRequest($raw) {
        $this->raw = $raw;
        $request = new SimpleGetEncoding();
        foreach (explode("&", $raw) as $pair) {
            if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
                $request->add(urldecode($matches[1]), urldecode($matches[2]));
            } elseif ($pair) {
                $request->add(urldecode($pair), '');
            }
        }
        return $request;
    }

    /**
     *    Accessor for protocol part.
     *    @param string $default    Value to use if not present.
     *    @return string            Scheme name, e.g "http".
     *    @access public
     */
    function getScheme($default = false) {
        return $this->scheme ? $this->scheme : $default;
    }

    /**
     *    Accessor for user name.
     *    @return string    Username preceding host.
     *    @access public
     */
    function getUsername() {
        return $this->username;
    }

    /**
     *    Accessor for password.
     *    @return string    Password preceding host.
     *    @access public
     */
    function getPassword() {
        return $this->password;
    }

    /**
     *    Accessor for hostname and port.
     *    @param string $default    Value to use if not present.
     *    @return string            Hostname only.
     *    @access public
     */
    function getHost($default = false) {
        return $this->host ? $this->host : $default;
    }

    /**
     *    Accessor for top level domain.
     *    @return string       Last part of host.
     *    @access public
     */
    function getTld() {
        $path_parts = pathinfo($this->getHost());
        return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
    }

    /**
     *    Accessor for port number.
     *    @return integer    TCP/IP port number.
     *    @access public
     */
    function getPort() {
        return $this->port;
    }

    /**
     *    Accessor for path.
     *    @return string    Full path including leading slash if implied.
     *    @access public
     */
    function getPath() {
        if (! $this->path && $this->host) {
            return '/';
        }
        return $this->path;
    }

    /**
     *    Accessor for page if any. This may be a
     *    directory name if ambiguious.
     *    @return            Page name.
     *    @access public
     */
    function getPage() {
        if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
            return false;
        }
        return $matches[1];
    }

    /**
     *    Gets the path to the page.
     *    @return string       Path less the page.
     *    @access public
     */
    function getBasePath() {
        if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
            return false;
        }
        return $matches[1];
    }

    /**
     *    Accessor for fragment at end of URL after the "#".
     *    @return string    Part after "#".
     *    @access public
     */
    function getFragment() {
        return $this->fragment;
    }

    /**
     *    Sets image coordinates. Set to false to clear
     *    them.
     *    @param integer $x    Horizontal position.
     *    @param integer $y    Vertical position.
     *    @access public
     */
    function setCoordinates($x = false, $y = false) {
        if (($x === false) || ($y === false)) {
            $this->x = $this->y = false;
            return;
        }
        $this->x = (integer)$x;
        $this->y = (integer)$y;
    }

    /**
     *    Accessor for horizontal image coordinate.
     *    @return integer        X value.
     *    @access public
     */
    function getX() {
        return $this->x;
    }

    /**
     *    Accessor for vertical image coordinate.
     *    @return integer        Y value.
     *    @access public
     */
    function getY() {
        return $this->y;
    }

    /**
     *    Accessor for current request parameters
     *    in URL string form. Will return teh original request
     *    if at all possible even if it doesn't make much
     *    sense.
     *    @return string   Form is string "?a=1&b=2", etc.
     *    @access public
     */
    function getEncodedRequest() {
        if ($this->raw) {
            $encoded = $this->raw;
        } else {
            $encoded = $this->request->asUrlRequest();
        }
        if ($encoded) {
            return '?' . preg_replace('/^\?/', '', $encoded);
        }
        return '';
    }

    /**
     *    Adds an additional parameter to the request.
     *    @param string $key            Name of parameter.
     *    @param string $value          Value as string.
     *    @access public
     */
    function addRequestParameter($key, $value) {
        $this->raw = false;
        $this->request->add($key, $value);
    }

    /**
     *    Adds additional parameters to the request.
     *    @param hash/SimpleFormEncoding $parameters   Additional
     *                                                parameters.
     *    @access public
     */
    function addRequestParameters($parameters) {
        $this->raw = false;
        $this->request->merge($parameters);
    }

    /**
     *    Clears down all parameters.
     *    @access public
     */
    function clearRequest() {
        $this->raw = false;
        $this->request = new SimpleGetEncoding();
    }

    /**
     *    Gets the frame target if present. Although
     *    not strictly part of the URL specification it
     *    acts as similarily to the browser.
     *    @return boolean/string    Frame name or false if none.
     *    @access public
     */
    function getTarget() {
        return $this->target;
    }

    /**
     *    Attaches a frame target.
     *    @param string $frame        Name of frame.
     *    @access public
     */
    function setTarget($frame) {
        $this->raw = false;
        $this->target = $frame;
    }

    /**
     *    Renders the URL back into a string.
     *    @return string        URL in canonical form.
     *    @access public
     */
    function asString() {
        $path = $this->path;
        $scheme = $identity = $host = $port = $encoded = $fragment = '';
        if ($this->username && $this->password) {
            $identity = $this->username . ':' . $this->password . '@';
        }
        if ($this->getHost()) {
            $scheme = $this->getScheme() ? $this->getScheme() : 'http';
            $scheme .= '://';
            $host = $this->getHost();
        } elseif ($this->getScheme() === 'file') {
            // Safest way; otherwise, file URLs on Windows have an extra
            // leading slash. It might be possible to convert file://
            // URIs to local file paths, but that requires more research.
            $scheme = 'file://';
        }
        if ($this->getPort() && $this->getPort() != 80 ) {
            $port = ':'.$this->getPort();
        }

        if (substr($this->path, 0, 1) == '/') {
            $path = $this->normalisePath($this->path);
        }
        $encoded = $this->getEncodedRequest();
        $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
        $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
        return "$scheme$identity$host$port$path$encoded$fragment$coords";
    }

    /**
     *    Replaces unknown sections to turn a relative
     *    URL into an absolute one. The base URL can
     *    be either a string or a SimpleUrl object.
     *    @param string/SimpleUrl $base       Base URL.
     *    @access public
     */
    function makeAbsolute($base) {
        if (! is_object($base)) {
            $base = new SimpleUrl($base);
        }
        if ($this->getHost()) {
            $scheme = $this->getScheme();
            $host = $this->getHost();
            $port = $this->getPort() ? ':' . $this->getPort() : '';
            $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
            if (! $identity) {
                $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
            }
        } else {
            $scheme = $base->getScheme();
            $host = $base->getHost();
            $port = $base->getPort() ? ':' . $base->getPort() : '';
            $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
        }
        $path = $this->normalisePath($this->extractAbsolutePath($base));
        $encoded = $this->getEncodedRequest();
        $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
        $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
        return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
    }

    /**
     *    Replaces unknown sections of the path with base parts
     *    to return a complete absolute one.
     *    @param string/SimpleUrl $base       Base URL.
     *    @param string                       Absolute path.
     *    @access private
     */
    protected function extractAbsolutePath($base) {
        if ($this->getHost()) {
            return $this->path;
        }
        if (! $this->isRelativePath($this->path)) {
            return $this->path;
        }
        if ($this->path) {
            return $base->getBasePath() . $this->path;
        }
        return $base->getPath();
    }

    /**
     *    Simple test to see if a path part is relative.
     *    @param string $path        Path to test.
     *    @return boolean            True if starts with a "/".
     *    @access private
     */
    protected function isRelativePath($path) {
        return (substr($path, 0, 1) != '/');
    }

    /**
     *    Extracts the username and password for use in rendering
     *    a URL.
     *    @return string/boolean    Form of username:password or false.
     *    @access public
     */
    function getIdentity() {
        if ($this->username && $this->password) {
            return $this->username . ':' . $this->password;
        }
        return false;
    }

    /**
     *    Replaces . and .. sections of the path.
     *    @param string $path    Unoptimised path.
     *    @return string         Path with dots removed if possible.
     *    @access public
     */
    function normalisePath($path) {
        $path = preg_replace('|/\./|', '/', $path);
        return preg_replace('|/[^/]+/\.\./|', '/', $path);
    }

    /**
     *    A pipe seperated list of all TLDs that result in two part
     *    domain names.
     *    @return string        Pipe separated list.
     *    @access public
     */
    static function getAllTopLevelDomains() {
        return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
    }
}
?>