dokuwiki/inc/JSON.php
Andreas Gohr 55ce110126 fixed a JSON bug with handling backspaces
This was fixed in upstream and the upstream tests caught this :-)
2012-05-03 00:34:24 +02:00

648 lines
28 KiB
PHP

<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
/**
* Converts to and from JSON format.
*
* JSON (JavaScript Object Notation) is a lightweight data-interchange
* format. It is easy for humans to read and write. It is easy for machines
* to parse and generate. It is based on a subset of the JavaScript
* Programming Language, Standard ECMA-262 3rd Edition - December 1999.
* This feature can also be found in Python. JSON is a text format that is
* completely language independent but uses conventions that are familiar
* to programmers of the C-family of languages, including C, C++, C#, Java,
* JavaScript, Perl, TCL, and many others. These properties make JSON an
* ideal data-interchange language.
*
* This package provides a simple encoder and decoder for JSON notation. It
* is intended for use with client-side Javascript applications that make
* use of HTTPRequest to perform server communication functions - data can
* be encoded into JSON notation for use in a client-side javascript, or
* decoded from incoming Javascript requests. JSON format is native to
* Javascript, and can be directly eval()'ed with no further parsing
* overhead
*
* All strings should be in ASCII or UTF-8 format!
*
* PHP versions 4 and 5
*
* LICENSE: Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met: Redistributions of source code must retain the
* above copyright notice, this list of conditions and the following
* disclaimer. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
* NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* @author Michal Migurski <mike-json@teczno.com>
* @author Matt Knapp <mdknapp[at]gmail[dot]com>
* @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
* @copyright 2005 Michal Migurski
* @license http://www.freebsd.org/copyright/freebsd-license.html
* @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
*/
// for DokuWiki
if(!defined('DOKU_INC')) die('meh.');
/**
* Marker constant for JSON::decode(), used to flag stack state
*/
define('JSON_SLICE', 1);
/**
* Marker constant for JSON::decode(), used to flag stack state
*/
define('JSON_IN_STR', 2);
/**
* Marker constant for JSON::decode(), used to flag stack state
*/
define('JSON_IN_ARR', 4);
/**
* Marker constant for JSON::decode(), used to flag stack state
*/
define('JSON_IN_OBJ', 8);
/**
* Marker constant for JSON::decode(), used to flag stack state
*/
define('JSON_IN_CMT', 16);
/**
* Behavior switch for JSON::decode()
*/
define('JSON_LOOSE_TYPE', 10);
/**
* Behavior switch for JSON::decode()
*/
define('JSON_STRICT_TYPE', 11);
/**
* Converts to and from JSON format.
*/
class JSON {
/**
* Disables the use of PHP5's native json_decode()
*
* You shouldn't change this usually because the native function is much
* faster. However, this non-native will also parse slightly broken JSON
* which might be handy when talking to a non-conform endpoint
*/
public $skipnative = false;
/**
* constructs a new JSON instance
*
* @param int $use object behavior: when encoding or decoding,
* be loose or strict about object/array usage
*
* possible values:
* JSON_STRICT_TYPE - strict typing, default
* "{...}" syntax creates objects in decode.
* JSON_LOOSE_TYPE - loose typing
* "{...}" syntax creates associative arrays in decode.
*/
function JSON($use=JSON_STRICT_TYPE) {
$this->use = $use;
}
/**
* encodes an arbitrary variable into JSON format
* If available the native PHP JSON implementation is used.
*
* @param mixed $var any number, boolean, string, array, or object to be encoded.
* see argument 1 to JSON() above for array-parsing behavior.
* if var is a strng, note that encode() always expects it
* to be in ASCII or UTF-8 format!
*
* @return string JSON string representation of input var
* @access public
*/
function encode($var) {
if (!$this->skipnative && function_exists('json_encode')){
return json_encode($var);
}
switch (gettype($var)) {
case 'boolean':
return $var ? 'true' : 'false';
case 'NULL':
return 'null';
case 'integer':
return sprintf('%d', $var);
case 'double':
case 'float':
return sprintf('%f', $var);
case 'string':
// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
$ascii = '';
$strlen_var = strlen($var);
/*
* Iterate over every character in the string,
* escaping with a slash or encoding to UTF-8 where necessary
*/
for ($c = 0; $c < $strlen_var; ++$c) {
$ord_var_c = ord($var{$c});
switch ($ord_var_c) {
case 0x08:
$ascii .= '\b';
break;
case 0x09:
$ascii .= '\t';
break;
case 0x0A:
$ascii .= '\n';
break;
case 0x0C:
$ascii .= '\f';
break;
case 0x0D:
$ascii .= '\r';
break;
case 0x22:
case 0x2F:
case 0x5C:
// double quote, slash, slosh
$ascii .= '\\'.$var{$c};
break;
case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
// characters U-00000000 - U-0000007F (same as ASCII)
$ascii .= $var{$c};
break;
case (($ord_var_c & 0xE0) == 0xC0):
// characters U-00000080 - U-000007FF, mask 110XXXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c, ord($var{$c+1}));
$c+=1;
//$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
$utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xF0) == 0xE0):
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c+1}),
ord($var{$c+2}));
$c+=2;
//$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
$utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xF8) == 0xF0):
// characters U-00010000 - U-001FFFFF, mask 11110XXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c+1}),
ord($var{$c+2}),
ord($var{$c+3}));
$c+=3;
//$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
$utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xFC) == 0xF8):
// characters U-00200000 - U-03FFFFFF, mask 111110XX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c+1}),
ord($var{$c+2}),
ord($var{$c+3}),
ord($var{$c+4}));
$c+=4;
//$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
$utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
case (($ord_var_c & 0xFE) == 0xFC):
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c+1}),
ord($var{$c+2}),
ord($var{$c+3}),
ord($var{$c+4}),
ord($var{$c+5}));
$c+=5;
//$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
$utf16 = utf8_to_utf16be($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
}
}
return '"'.$ascii.'"';
case 'array':
/*
* As per JSON spec if any array key is not an integer
* we must treat the the whole array as an object. We
* also try to catch a sparsely populated associative
* array with numeric keys here because some JS engines
* will create an array with empty indexes up to
* max_index which can cause memory issues and because
* the keys, which may be relevant, will be remapped
* otherwise.
*
* As per the ECMA and JSON specification an object may
* have any string as a property. Unfortunately due to
* a hole in the ECMA specification if the key is a
* ECMA reserved word or starts with a digit the
* parameter is only accessible using ECMAScript's
* bracket notation.
*/
// treat as a JSON object
if (is_array($var) && count($var) && (array_keys($var) !== range(0, count($var) - 1))) {
return sprintf('{%s}', join(',', array_map(array($this, 'name_value'),
array_keys($var),
array_values($var))));
}
// treat it like a regular array
return sprintf('[%s]', join(',', array_map(array($this, 'encode'), $var)));
case 'object':
$vars = get_object_vars($var);
return sprintf('{%s}', join(',', array_map(array($this, 'name_value'),
array_keys($vars),
array_values($vars))));
default:
return '';
}
}
/**
* encodes an arbitrary variable into JSON format, alias for encode()
*/
function enc($var) {
return $this->encode($var);
}
/** function name_value
* array-walking function for use in generating JSON-formatted name-value pairs
*
* @param string $name name of key to use
* @param mixed $value reference to an array element to be encoded
*
* @return string JSON-formatted name-value pair, like '"name":value'
* @access private
*/
function name_value($name, $value) {
return (sprintf("%s:%s", $this->encode(strval($name)), $this->encode($value)));
}
/**
* reduce a string by removing leading and trailing comments and whitespace
*
* @param $str string string value to strip of comments and whitespace
*
* @return string string value stripped of comments and whitespace
* @access private
*/
function reduce_string($str) {
$str = preg_replace(array(
// eliminate single line comments in '// ...' form
'#^\s*//(.+)$#m',
// eliminate multi-line comments in '/* ... */' form, at start of string
'#^\s*/\*(.+)\*/#Us',
// eliminate multi-line comments in '/* ... */' form, at end of string
'#/\*(.+)\*/\s*$#Us'
), '', $str);
// eliminate extraneous space
return trim($str);
}
/**
* decodes a JSON string into appropriate variable
* If available the native PHP JSON implementation is used.
*
* @param string $str JSON-formatted string
*
* @return mixed number, boolean, string, array, or object
* corresponding to given JSON input string.
* See argument 1 to JSON() above for object-output behavior.
* Note that decode() always returns strings
* in ASCII or UTF-8 format!
* @access public
*/
function decode($str) {
if (!$this->skipnative && function_exists('json_decode')){
return json_decode($str,($this->use == JSON_LOOSE_TYPE));
}
$str = $this->reduce_string($str);
switch (strtolower($str)) {
case 'true':
return true;
case 'false':
return false;
case 'null':
return null;
default:
if (is_numeric($str)) {
// Lookie-loo, it's a number
// This would work on its own, but I'm trying to be
// good about returning integers where appropriate:
// return (float)$str;
// Return float or int, as appropriate
return ((float)$str == (integer)$str)
? (integer)$str
: (float)$str;
} elseif (preg_match('/^("|\').+("|\')$/s', $str, $m) && $m[1] == $m[2]) {
// STRINGS RETURNED IN UTF-8 FORMAT
$delim = substr($str, 0, 1);
$chrs = substr($str, 1, -1);
$utf8 = '';
$strlen_chrs = strlen($chrs);
for ($c = 0; $c < $strlen_chrs; ++$c) {
$substr_chrs_c_2 = substr($chrs, $c, 2);
$ord_chrs_c = ord($chrs{$c});
switch ($substr_chrs_c_2) {
case '\b':
$utf8 .= chr(0x08);
$c+=1;
break;
case '\t':
$utf8 .= chr(0x09);
$c+=1;
break;
case '\n':
$utf8 .= chr(0x0A);
$c+=1;
break;
case '\f':
$utf8 .= chr(0x0C);
$c+=1;
break;
case '\r':
$utf8 .= chr(0x0D);
$c+=1;
break;
case '\\"':
case '\\\'':
case '\\\\':
case '\\/':
if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
($delim == "'" && $substr_chrs_c_2 != '\\"')) {
$utf8 .= $chrs{++$c};
}
break;
default:
if (preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6))) {
// single, escaped unicode character
$utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
. chr(hexdec(substr($chrs, ($c+4), 2)));
//$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
$utf8 .= utf16be_to_utf8($utf16);
$c+=5;
} elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) {
$utf8 .= $chrs{$c};
} elseif(($ord_chrs_c & 0xE0) == 0xC0) {
// characters U-00000080 - U-000007FF, mask 110XXXXX
//see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 2);
$c += 1;
} elseif(($ord_chrs_c & 0xF0) == 0xE0) {
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 3);
$c += 2;
} elseif(($ord_chrs_c & 0xF8) == 0xF0) {
// characters U-00010000 - U-001FFFFF, mask 11110XXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 4);
$c += 3;
} elseif(($ord_chrs_c & 0xFC) == 0xF8) {
// characters U-00200000 - U-03FFFFFF, mask 111110XX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 5);
$c += 4;
} elseif(($ord_chrs_c & 0xFE) == 0xFC) {
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$utf8 .= substr($chrs, $c, 6);
$c += 5;
}
break;
}
}
return $utf8;
} elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
// array, or object notation
if ($str{0} == '[') {
$stk = array(JSON_IN_ARR);
$arr = array();
} else {
if ($this->use == JSON_LOOSE_TYPE) {
$stk = array(JSON_IN_OBJ);
$obj = array();
} else {
$stk = array(JSON_IN_OBJ);
$obj = new stdClass();
}
}
array_push($stk, array('what' => JSON_SLICE,
'where' => 0,
'delim' => false));
$chrs = substr($str, 1, -1);
$chrs = $this->reduce_string($chrs);
if ($chrs == '') {
if (reset($stk) == JSON_IN_ARR) {
return $arr;
} else {
return $obj;
}
}
//print("\nparsing {$chrs}\n");
$strlen_chrs = strlen($chrs);
for ($c = 0; $c <= $strlen_chrs; ++$c) {
$top = end($stk);
$substr_chrs_c_2 = substr($chrs, $c, 2);
if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == JSON_SLICE))) {
// found a comma that is not inside a string, array, etc.,
// OR we've reached the end of the character list
$slice = substr($chrs, $top['where'], ($c - $top['where']));
array_push($stk, array('what' => JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
//print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
if (reset($stk) == JSON_IN_ARR) {
// we are in an array, so just push an element onto the stack
array_push($arr, $this->decode($slice));
} elseif (reset($stk) == JSON_IN_OBJ) {
// we are in an object, so figure
// out the property name and set an
// element in an associative array,
// for now
if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
// "name":value pair
$key = $this->decode($parts[1]);
$val = $this->decode($parts[2]);
if ($this->use == JSON_LOOSE_TYPE) {
$obj[$key] = $val;
} else {
$obj->$key = $val;
}
} elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
// name:value pair, where name is unquoted
$key = $parts[1];
$val = $this->decode($parts[2]);
if ($this->use == JSON_LOOSE_TYPE) {
$obj[$key] = $val;
} else {
$obj->$key = $val;
}
}
}
} elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != JSON_IN_STR)) {
// found a quote, and we are not inside a string
array_push($stk, array('what' => JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
//print("Found start of string at {$c}\n");
} elseif (($chrs{$c} == $top['delim']) &&
($top['what'] == JSON_IN_STR) &&
((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
// found a quote, we're in a string, and it's not escaped
// we know that it's not escaped becase there is _not_ an
// odd number of backslashes at the end of the string so far
array_pop($stk);
//print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
} elseif (($chrs{$c} == '[') &&
in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
// found a left-bracket, and we are in an array, object, or slice
array_push($stk, array('what' => JSON_IN_ARR, 'where' => $c, 'delim' => false));
//print("Found start of array at {$c}\n");
} elseif (($chrs{$c} == ']') && ($top['what'] == JSON_IN_ARR)) {
// found a right-bracket, and we're in an array
array_pop($stk);
//print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
} elseif (($chrs{$c} == '{') &&
in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
// found a left-brace, and we are in an array, object, or slice
array_push($stk, array('what' => JSON_IN_OBJ, 'where' => $c, 'delim' => false));
//print("Found start of object at {$c}\n");
} elseif (($chrs{$c} == '}') && ($top['what'] == JSON_IN_OBJ)) {
// found a right-brace, and we're in an object
array_pop($stk);
//print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
} elseif (($substr_chrs_c_2 == '/*') &&
in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
// found a comment start, and we are in an array, object, or slice
array_push($stk, array('what' => JSON_IN_CMT, 'where' => $c, 'delim' => false));
$c++;
//print("Found start of comment at {$c}\n");
} elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == JSON_IN_CMT)) {
// found a comment end, and we're in one now
array_pop($stk);
$c++;
for ($i = $top['where']; $i <= $c; ++$i)
$chrs = substr_replace($chrs, ' ', $i, 1);
//print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
}
}
if (reset($stk) == JSON_IN_ARR) {
return $arr;
} elseif (reset($stk) == JSON_IN_OBJ) {
return $obj;
}
}
}
}
/**
* decodes a JSON string into appropriate variable; alias for decode()
*/
function dec($var) {
return $this->decode($var);
}
}