dokuwiki/inc/JSON.php

<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */

/**
 * Converts to and from JSON format.
 *
 * JSON (JavaScript Object Notation) is a lightweight data-interchange
 * format. It is easy for humans to read and write. It is easy for machines
 * to parse and generate. It is based on a subset of the JavaScript
 * Programming Language, Standard ECMA-262 3rd Edition - December 1999.
 * This feature can also be found in  Python. JSON is a text format that is
 * completely language independent but uses conventions that are familiar
 * to programmers of the C-family of languages, including C, C++, C#, Java,
 * JavaScript, Perl, TCL, and many others. These properties make JSON an
 * ideal data-interchange language.
 *
 * This package provides a simple encoder and decoder for JSON notation. It
 * is intended for use with client-side Javascript applications that make
 * use of HTTPRequest to perform server communication functions - data can
 * be encoded into JSON notation for use in a client-side javascript, or
 * decoded from incoming Javascript requests. JSON format is native to
 * Javascript, and can be directly eval()'ed with no further parsing
 * overhead
 *
 * All strings should be in ASCII or UTF-8 format!
 *
 * PHP versions 4 and 5
 *
 * LICENSE: Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met: Redistributions of source code must retain the
 * above copyright notice, this list of conditions and the following
 * disclaimer. Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
 * NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * @author      Michal Migurski <mike-json@teczno.com>
 * @author      Matt Knapp <mdknapp[at]gmail[dot]com>
 * @author      Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
 * @copyright   2005 Michal Migurski
 * @license     http://www.freebsd.org/copyright/freebsd-license.html
 * @link        http://pear.php.net/pepr/pepr-proposal-show.php?id=198
 */

// for DokuWiki
if(!defined('DOKU_INC')) die('meh.');

/**
 * Marker constant for JSON::decode(), used to flag stack state
 */
define('JSON_SLICE',   1);

/**
 * Marker constant for JSON::decode(), used to flag stack state
 */
define('JSON_IN_STR',  2);

/**
 * Marker constant for JSON::decode(), used to flag stack state
 */
define('JSON_IN_ARR',  4);

/**
 * Marker constant for JSON::decode(), used to flag stack state
 */
define('JSON_IN_OBJ',  8);

/**
 * Marker constant for JSON::decode(), used to flag stack state
 */
define('JSON_IN_CMT', 16);

/**
 * Behavior switch for JSON::decode()
 */
define('JSON_LOOSE_TYPE', 10);

/**
 * Behavior switch for JSON::decode()
 */
define('JSON_STRICT_TYPE', 11);

/**
 * Converts to and from JSON format.
 */
class JSON {

    /**
     * Disables the use of PHP5's native json_decode()
     *
     * You shouldn't change this usually because the native function is much
     * faster. However, this non-native will also parse slightly broken JSON
     * which might be handy when talking to a non-conform endpoint
     */
    public $skipnative = false;

    /**
     * constructs a new JSON instance
     *
     * @param    int     $use    object behavior: when encoding or decoding,
     *                           be loose or strict about object/array usage
     *
     *                           possible values:
     *                              JSON_STRICT_TYPE - strict typing, default
     *                                                 "{...}" syntax creates objects in decode.
     *                               JSON_LOOSE_TYPE - loose typing
     *                                                 "{...}" syntax creates associative arrays in decode.
     */
    function JSON($use=JSON_STRICT_TYPE) {
        $this->use = $use;
    }

    /**
     * encodes an arbitrary variable into JSON format
     * If available the native PHP JSON implementation is used.
     *
     * @param    mixed   $var    any number, boolean, string, array, or object to be encoded.
     *                           see argument 1 to JSON() above for array-parsing behavior.
     *                           if var is a strng, note that encode() always expects it
     *                           to be in ASCII or UTF-8 format!
     *
     * @return   string  JSON string representation of input var
     * @access   public
     */
    function encode($var) {
        if (!$this->skipnative && function_exists('json_encode')){
            return json_encode($var);
        }
        switch (gettype($var)) {
            case 'boolean':
                return $var ? 'true' : 'false';

            case 'NULL':
                return 'null';

            case 'integer':
                return sprintf('%d', $var);

            case 'double':
            case 'float':
                return sprintf('%f', $var);

            case 'string':
                // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
                $ascii = '';
                $strlen_var = strlen($var);

                /*
                 * Iterate over every character in the string,
                 * escaping with a slash or encoding to UTF-8 where necessary
                 */
                for ($c = 0; $c < $strlen_var; ++$c) {

                    $ord_var_c = ord($var{$c});

                    switch ($ord_var_c) {
                        case 0x08:
                            $ascii .= '\b';
                            break;
                        case 0x09:
                            $ascii .= '\t';
                            break;
                        case 0x0A:
                            $ascii .= '\n';
                            break;
                        case 0x0C:
                            $ascii .= '\f';
                            break;
                        case 0x0D:
                            $ascii .= '\r';
                            break;

                        case 0x22:
                        case 0x2F:
                        case 0x5C:
                            // double quote, slash, slosh
                            $ascii .= '\\'.$var{$c};
                            break;

                        case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
                            // characters U-00000000 - U-0000007F (same as ASCII)
                            $ascii .= $var{$c};
                            break;

                        case (($ord_var_c & 0xE0) == 0xC0):
                            // characters U-00000080 - U-000007FF, mask 110XXXXX
                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                            $char = pack('C*', $ord_var_c, ord($var{$c+1}));
                            $c+=1;
                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
                            $utf16 = utf8_to_utf16be($char);
                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
                            break;

                        case (($ord_var_c & 0xF0) == 0xE0):
                            // characters U-00000800 - U-0000FFFF, mask 1110XXXX
                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                            $char = pack('C*', $ord_var_c,
                                         ord($var{$c+1}),
                                         ord($var{$c+2}));
                            $c+=2;
                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
                            $utf16 = utf8_to_utf16be($char);
                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
                            break;

                        case (($ord_var_c & 0xF8) == 0xF0):
                            // characters U-00010000 - U-001FFFFF, mask 11110XXX
                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                            $char = pack('C*', $ord_var_c,
                                         ord($var{$c+1}),
                                         ord($var{$c+2}),
                                         ord($var{$c+3}));
                            $c+=3;
                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
                            $utf16 = utf8_to_utf16be($char);
                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
                            break;

                        case (($ord_var_c & 0xFC) == 0xF8):
                            // characters U-00200000 - U-03FFFFFF, mask 111110XX
                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                            $char = pack('C*', $ord_var_c,
                                         ord($var{$c+1}),
                                         ord($var{$c+2}),
                                         ord($var{$c+3}),
                                         ord($var{$c+4}));
                            $c+=4;
                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
                            $utf16 = utf8_to_utf16be($char);
                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
                            break;

                        case (($ord_var_c & 0xFE) == 0xFC):
                            // characters U-04000000 - U-7FFFFFFF, mask 1111110X
                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                            $char = pack('C*', $ord_var_c,
                                         ord($var{$c+1}),
                                         ord($var{$c+2}),
                                         ord($var{$c+3}),
                                         ord($var{$c+4}),
                                         ord($var{$c+5}));
                            $c+=5;
                            //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
                            $utf16 = utf8_to_utf16be($char);
                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
                            break;
                    }
                }

                return '"'.$ascii.'"';

            case 'array':
                /*
                 * As per JSON spec if any array key is not an integer
                 * we must treat the the whole array as an object. We
                 * also try to catch a sparsely populated associative
                 * array with numeric keys here because some JS engines
                 * will create an array with empty indexes up to
                 * max_index which can cause memory issues and because
                 * the keys, which may be relevant, will be remapped
                 * otherwise.
                 *
                 * As per the ECMA and JSON specification an object may
                 * have any string as a property. Unfortunately due to
                 * a hole in the ECMA specification if the key is a
                 * ECMA reserved word or starts with a digit the
                 * parameter is only accessible using ECMAScript's
                 * bracket notation.
                 */

                // treat as a JSON object
                if (is_array($var) && count($var) && (array_keys($var) !== range(0, count($var) - 1))) {
                    return sprintf('{%s}', join(',', array_map(array($this, 'name_value'),
                                                               array_keys($var),
                                                               array_values($var))));
                }

                // treat it like a regular array
                return sprintf('[%s]', join(',', array_map(array($this, 'encode'), $var)));

            case 'object':
                $vars = get_object_vars($var);
                return sprintf('{%s}', join(',', array_map(array($this, 'name_value'),
                                                           array_keys($vars),
                                                           array_values($vars))));

            default:
                return '';
        }
    }

    /**
     * encodes an arbitrary variable into JSON format, alias for encode()
     */
    function enc($var) {
        return $this->encode($var);
    }

    /** function name_value
     * array-walking function for use in generating JSON-formatted name-value pairs
     *
     * @param    string  $name   name of key to use
     * @param    mixed   $value  reference to an array element to be encoded
     *
     * @return   string  JSON-formatted name-value pair, like '"name":value'
     * @access   private
     */
    function name_value($name, $value) {
        return (sprintf("%s:%s", $this->encode(strval($name)), $this->encode($value)));
    }

    /**
     * reduce a string by removing leading and trailing comments and whitespace
     *
     * @param    $str    string      string value to strip of comments and whitespace
     *
     * @return   string  string value stripped of comments and whitespace
     * @access   private
     */
    function reduce_string($str) {
        $str = preg_replace(array(

                // eliminate single line comments in '// ...' form
                '#^\s*//(.+)$#m',

                // eliminate multi-line comments in '/* ... */' form, at start of string
                '#^\s*/\*(.+)\*/#Us',

                // eliminate multi-line comments in '/* ... */' form, at end of string
                '#/\*(.+)\*/\s*$#Us'

            ), '', $str);

        // eliminate extraneous space
        return trim($str);
    }

    /**
     * decodes a JSON string into appropriate variable
     * If available the native PHP JSON implementation is used.
     *
     * @param    string  $str    JSON-formatted string
     *
     * @return   mixed   number, boolean, string, array, or object
     *                   corresponding to given JSON input string.
     *                   See argument 1 to JSON() above for object-output behavior.
     *                   Note that decode() always returns strings
     *                   in ASCII or UTF-8 format!
     * @access   public
     */
    function decode($str) {
        if (!$this->skipnative && function_exists('json_decode')){
            return json_decode($str,($this->use == JSON_LOOSE_TYPE));
        }

        $str = $this->reduce_string($str);

        switch (strtolower($str)) {
            case 'true':
                return true;

            case 'false':
                return false;

            case 'null':
                return null;

            default:
                if (is_numeric($str)) {
                    // Lookie-loo, it's a number

                    // This would work on its own, but I'm trying to be
                    // good about returning integers where appropriate:
                    // return (float)$str;

                    // Return float or int, as appropriate
                    return ((float)$str == (integer)$str)
                        ? (integer)$str
                        : (float)$str;

                } elseif (preg_match('/^("|\').+("|\')$/s', $str, $m) && $m[1] == $m[2]) {
                    // STRINGS RETURNED IN UTF-8 FORMAT
                    $delim = substr($str, 0, 1);
                    $chrs = substr($str, 1, -1);
                    $utf8 = '';
                    $strlen_chrs = strlen($chrs);

                    for ($c = 0; $c < $strlen_chrs; ++$c) {

                        $substr_chrs_c_2 = substr($chrs, $c, 2);
                        $ord_chrs_c = ord($chrs{$c});

                        switch ($substr_chrs_c_2) {
                            case '\b':
                                $utf8 .= chr(0x08);
                                $c+=1;
                                break;
                            case '\t':
                                $utf8 .= chr(0x09);
                                $c+=1;
                                break;
                            case '\n':
                                $utf8 .= chr(0x0A);
                                $c+=1;
                                break;
                            case '\f':
                                $utf8 .= chr(0x0C);
                                $c+=1;
                                break;
                            case '\r':
                                $utf8 .= chr(0x0D);
                                $c+=1;
                                break;

                            case '\\"':
                            case '\\\'':
                            case '\\\\':
                            case '\\/':
                                if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
                                   ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
                                    $utf8 .= $chrs{++$c};
                                }
                                break;

                            default:
                                if (preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6))) {
                                    // single, escaped unicode character
                                    $utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
                                           . chr(hexdec(substr($chrs, ($c+4), 2)));
                                    //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
                                    $utf8 .= utf16be_to_utf8($utf16);
                                    $c+=5;

                                } elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) {
                                    $utf8 .= $chrs{$c};

                                } elseif(($ord_chrs_c & 0xE0) == 0xC0) {
                                    // characters U-00000080 - U-000007FF, mask 110XXXXX
                                    //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                                    $utf8 .= substr($chrs, $c, 2);
                                    $c += 1;

                                } elseif(($ord_chrs_c & 0xF0) == 0xE0) {
                                    // characters U-00000800 - U-0000FFFF, mask 1110XXXX
                                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                                    $utf8 .= substr($chrs, $c, 3);
                                    $c += 2;

                                } elseif(($ord_chrs_c & 0xF8) == 0xF0) {
                                    // characters U-00010000 - U-001FFFFF, mask 11110XXX
                                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                                    $utf8 .= substr($chrs, $c, 4);
                                    $c += 3;

                                } elseif(($ord_chrs_c & 0xFC) == 0xF8) {
                                    // characters U-00200000 - U-03FFFFFF, mask 111110XX
                                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                                    $utf8 .= substr($chrs, $c, 5);
                                    $c += 4;

                                } elseif(($ord_chrs_c & 0xFE) == 0xFC) {
                                    // characters U-04000000 - U-7FFFFFFF, mask 1111110X
                                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
                                    $utf8 .= substr($chrs, $c, 6);
                                    $c += 5;

                                }
                                break;

                        }

                    }

                    return $utf8;

                } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
                    // array, or object notation

                    if ($str{0} == '[') {
                        $stk = array(JSON_IN_ARR);
                        $arr = array();
                    } else {
                        if ($this->use == JSON_LOOSE_TYPE) {
                            $stk = array(JSON_IN_OBJ);
                            $obj = array();
                        } else {
                            $stk = array(JSON_IN_OBJ);
                            $obj = new stdClass();
                        }
                    }

                    array_push($stk, array('what'  => JSON_SLICE,
                                           'where' => 0,
                                           'delim' => false));

                    $chrs = substr($str, 1, -1);
                    $chrs = $this->reduce_string($chrs);

                    if ($chrs == '') {
                        if (reset($stk) == JSON_IN_ARR) {
                            return $arr;

                        } else {
                            return $obj;

                        }
                    }

                    //print("\nparsing {$chrs}\n");

                    $strlen_chrs = strlen($chrs);

                    for ($c = 0; $c <= $strlen_chrs; ++$c) {

                        $top = end($stk);
                        $substr_chrs_c_2 = substr($chrs, $c, 2);

                        if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == JSON_SLICE))) {
                            // found a comma that is not inside a string, array, etc.,
                            // OR we've reached the end of the character list
                            $slice = substr($chrs, $top['where'], ($c - $top['where']));
                            array_push($stk, array('what' => JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
                            //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

                            if (reset($stk) == JSON_IN_ARR) {
                                // we are in an array, so just push an element onto the stack
                                array_push($arr, $this->decode($slice));

                            } elseif (reset($stk) == JSON_IN_OBJ) {
                                // we are in an object, so figure
                                // out the property name and set an
                                // element in an associative array,
                                // for now
                                if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
                                    // "name":value pair
                                    $key = $this->decode($parts[1]);
                                    $val = $this->decode($parts[2]);

                                    if ($this->use == JSON_LOOSE_TYPE) {
                                        $obj[$key] = $val;
                                    } else {
                                        $obj->$key = $val;
                                    }
                                } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
                                    // name:value pair, where name is unquoted
                                    $key = $parts[1];
                                    $val = $this->decode($parts[2]);

                                    if ($this->use == JSON_LOOSE_TYPE) {
                                        $obj[$key] = $val;
                                    } else {
                                        $obj->$key = $val;
                                    }
                                }

                            }

                        } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != JSON_IN_STR)) {
                            // found a quote, and we are not inside a string
                            array_push($stk, array('what' => JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
                            //print("Found start of string at {$c}\n");

                        } elseif (($chrs{$c} == $top['delim']) &&
                                 ($top['what'] == JSON_IN_STR) &&
                                 ((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
                            // found a quote, we're in a string, and it's not escaped
                            // we know that it's not escaped becase there is _not_ an
                            // odd number of backslashes at the end of the string so far
                            array_pop($stk);
                            //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");

                        } elseif (($chrs{$c} == '[') &&
                                 in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
                            // found a left-bracket, and we are in an array, object, or slice
                            array_push($stk, array('what' => JSON_IN_ARR, 'where' => $c, 'delim' => false));
                            //print("Found start of array at {$c}\n");

                        } elseif (($chrs{$c} == ']') && ($top['what'] == JSON_IN_ARR)) {
                            // found a right-bracket, and we're in an array
                            array_pop($stk);
                            //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

                        } elseif (($chrs{$c} == '{') &&
                                 in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
                            // found a left-brace, and we are in an array, object, or slice
                            array_push($stk, array('what' => JSON_IN_OBJ, 'where' => $c, 'delim' => false));
                            //print("Found start of object at {$c}\n");

                        } elseif (($chrs{$c} == '}') && ($top['what'] == JSON_IN_OBJ)) {
                            // found a right-brace, and we're in an object
                            array_pop($stk);
                            //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

                        } elseif (($substr_chrs_c_2 == '/*') &&
                                 in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
                            // found a comment start, and we are in an array, object, or slice
                            array_push($stk, array('what' => JSON_IN_CMT, 'where' => $c, 'delim' => false));
                            $c++;
                            //print("Found start of comment at {$c}\n");

                        } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == JSON_IN_CMT)) {
                            // found a comment end, and we're in one now
                            array_pop($stk);
                            $c++;

                            for ($i = $top['where']; $i <= $c; ++$i)
                                $chrs = substr_replace($chrs, ' ', $i, 1);

                            //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");

                        }

                    }

                    if (reset($stk) == JSON_IN_ARR) {
                        return $arr;

                    } elseif (reset($stk) == JSON_IN_OBJ) {
                        return $obj;

                    }

                }
        }
    }

    /**
     * decodes a JSON string into appropriate variable; alias for decode()
     */
    function dec($var) {
        return $this->decode($var);
    }
}