mirror of
https://github.com/libretro/RetroArch
synced 2025-01-30 12:32:52 +00:00
3930 lines
139 KiB
C
3930 lines
139 KiB
C
/*
|
|
Copyright (c) 2012 John-Anthony Owens
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files (the "Software"),
|
|
to deal in the Software without restriction, including without limitation
|
|
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
and/or sell copies of the Software, and to permit persons to whom the
|
|
Software is furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/* Ensure uint32_t type (compiler-dependent). */
|
|
#if defined(_MSC_VER)
|
|
typedef unsigned __int32 uint32_t;
|
|
#else
|
|
#include <stdint.h>
|
|
#endif
|
|
|
|
/* Ensure SIZE_MAX defined. */
|
|
#ifndef SIZE_MAX
|
|
#define SIZE_MAX ((size_t)-1)
|
|
#endif
|
|
|
|
/* Mark APIs for export (as opposed to import) when we build this file. */
|
|
#define JSON_BUILDING
|
|
#include <formats/jsonsax_full.h>
|
|
|
|
/* Default allocation constants. */
|
|
#define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */
|
|
#define DEFAULT_SYMBOL_STACK_SIZE 32 /* MUST be a power of 2 */
|
|
|
|
/* Types for readability. */
|
|
typedef unsigned char byte;
|
|
typedef uint32_t Codepoint;
|
|
|
|
/* Especially-relevant Unicode codepoints. */
|
|
#define U_(x) ((Codepoint)(x))
|
|
#define NULL_CODEPOINT U_(0x0000)
|
|
#define BACKSPACE_CODEPOINT U_(0x0008)
|
|
#define TAB_CODEPOINT U_(0x0009)
|
|
#define LINE_FEED_CODEPOINT U_(0x000A)
|
|
#define FORM_FEED_CODEPOINT U_(0x000C)
|
|
#define CARRIAGE_RETURN_CODEPOINT U_(0x000D)
|
|
#define FIRST_NON_CONTROL_CODEPOINT U_(0x0020)
|
|
#define DELETE_CODEPOINT U_(0x007F)
|
|
#define FIRST_NON_ASCII_CODEPOINT U_(0x0080)
|
|
#define FIRST_2_BYTE_UTF8_CODEPOINT U_(0x0080)
|
|
#define FIRST_3_BYTE_UTF8_CODEPOINT U_(0x0800)
|
|
#define LINE_SEPARATOR_CODEPOINT U_(0x2028)
|
|
#define PARAGRAPH_SEPARATOR_CODEPOINT U_(0x2029)
|
|
#define BOM_CODEPOINT U_(0xFEFF)
|
|
#define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD)
|
|
#define FIRST_NON_BMP_CODEPOINT U_(0x10000)
|
|
#define FIRST_4_BYTE_UTF8_CODEPOINT U_(0x10000)
|
|
#define MAX_CODEPOINT U_(0x10FFFF)
|
|
#define EOF_CODEPOINT U_(0xFFFFFFFF)
|
|
|
|
/* Bit-masking macros. */
|
|
#define BOTTOM_3_BITS(x) ((x) & 0x7)
|
|
#define BOTTOM_4_BITS(x) ((x) & 0xF)
|
|
#define BOTTOM_5_BITS(x) ((x) & 0x1F)
|
|
#define BOTTOM_6_BITS(x) ((x) & 0x3F)
|
|
|
|
/* Bit-flag macros. */
|
|
#define GET_FLAGS(x, f) ((x) & (f))
|
|
#define SET_FLAGS_ON(flagstype, x, f) do { (x) |= (flagstype)(f); } while (0)
|
|
#define SET_FLAGS_OFF(flagstype, x, f) do { (x) &= (flagstype)~(f); } while (0)
|
|
#define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0)
|
|
|
|
/* UTF-8 byte-related macros. */
|
|
#define IS_UTF8_SINGLE_BYTE(b) (((b) & 0x80) == 0)
|
|
#define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80)
|
|
#define IS_UTF8_FIRST_BYTE_OF_2(b) (((b) & 0xE0) == 0xC0)
|
|
#define IS_UTF8_FIRST_BYTE_OF_3(b) (((b) & 0xF0) == 0xE0)
|
|
#define IS_UTF8_FIRST_BYTE_OF_4(b) (((b) & 0xF8) == 0xF0)
|
|
|
|
/* Unicode codepoint-related macros. */
|
|
#define IS_NONCHARACTER(c) ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF)))
|
|
#define IS_SURROGATE(c) (((c) & 0xFFFFF800) == 0xD800)
|
|
#define IS_LEADING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xD800)
|
|
#define IS_TRAILING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xDC00)
|
|
#define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400)
|
|
#define SURROGATES_FROM_CODEPOINT(c) ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00)
|
|
#define SHORTEST_ENCODING_SEQUENCE(enc) (1U << ((enc) >> 1))
|
|
#define LONGEST_ENCODING_SEQUENCE 4
|
|
|
|
/* Internal types that alias enum types in the public API.
|
|
By using byte to represent these values internally,
|
|
we can guarantee minimal storage size and avoid compiler
|
|
warnings when using values of the type in switch statements
|
|
that don't have (or need) a default case. */
|
|
typedef byte Encoding;
|
|
typedef byte Error;
|
|
typedef byte TokenAttributes;
|
|
|
|
/******************** Default Memory Suite ********************/
|
|
|
|
static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size)
|
|
{
|
|
(void)userData; /* unused */
|
|
return realloc(ptr, size);
|
|
}
|
|
|
|
static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr)
|
|
{
|
|
(void)userData; /* unused */
|
|
free(ptr);
|
|
}
|
|
|
|
static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler };
|
|
|
|
static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length)
|
|
{
|
|
size_t newLength = length * 2;
|
|
if (newLength < length)
|
|
{
|
|
pBuffer = NULL;
|
|
}
|
|
else if (pBuffer == pDefaultBuffer)
|
|
{
|
|
pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength);
|
|
if (pBuffer)
|
|
{
|
|
memcpy(pBuffer, pDefaultBuffer, length);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength);
|
|
}
|
|
return pBuffer;
|
|
}
|
|
|
|
/******************** Unicode Decoder ********************/
|
|
|
|
/* Mutually-exclusive decoder states. */
|
|
/* The bits of DecoderState are layed out as follows:
|
|
|
|
---lllnn
|
|
|
|
- = unused (3 bits)
|
|
l = expected total sequence length (3 bits)
|
|
d = number of bytes decoded so far (2 bits)
|
|
*/
|
|
|
|
#define DECODER_RESET 0x00
|
|
#define DECODED_1_OF_2 0x09 /* 00001001 */
|
|
#define DECODED_1_OF_3 0x0D /* 00001101 */
|
|
#define DECODED_2_OF_3 0x0E /* 00001110 */
|
|
#define DECODED_1_OF_4 0x11 /* 00010001 */
|
|
#define DECODED_2_OF_4 0x12 /* 00010010 */
|
|
#define DECODED_3_OF_4 0x13 /* 00010011 */
|
|
typedef byte DecoderState;
|
|
|
|
#define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3)
|
|
|
|
/* Decoder data. */
|
|
typedef struct tag_DecoderData
|
|
{
|
|
DecoderState state;
|
|
uint32_t bits;
|
|
} DecoderData;
|
|
typedef DecoderData* Decoder;
|
|
|
|
/* The bits of DecoderOutput are layed out as follows:
|
|
|
|
------rrlllccccccccccccccccccccc
|
|
|
|
- = unused (6 bits)
|
|
r = result code (2 bits)
|
|
l = sequence length (3 bits)
|
|
c = codepoint (21 bits)
|
|
*/
|
|
#define SEQUENCE_PENDING 0
|
|
#define SEQUENCE_COMPLETE 1
|
|
#define SEQUENCE_INVALID_INCLUSIVE 2
|
|
#define SEQUENCE_INVALID_EXCLUSIVE 3
|
|
typedef uint32_t DecoderResultCode;
|
|
|
|
#define DECODER_OUTPUT(r, l, c) (DecoderOutput)(((r) << 24) | ((l) << 21) | (c))
|
|
#define DECODER_RESULT_CODE(o) (DecoderResultCode)((DecoderOutput)(o) >> 24)
|
|
#define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7)
|
|
#define DECODER_CODEPOINT(o) (Codepoint)((DecoderOutput)(o) & 0x001FFFFF)
|
|
typedef uint32_t DecoderOutput;
|
|
|
|
/* Decoder functions. */
|
|
|
|
static void Decoder_Reset(Decoder decoder)
|
|
{
|
|
decoder->state = DECODER_RESET;
|
|
decoder->bits = 0;
|
|
}
|
|
|
|
static int Decoder_SequencePending(Decoder decoder)
|
|
{
|
|
return decoder->state != DECODER_RESET;
|
|
}
|
|
|
|
static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b)
|
|
{
|
|
DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0);
|
|
switch (encoding)
|
|
{
|
|
case JSON_UTF8:
|
|
/* When the input encoding is UTF-8, the decoded codepoint's bits are
|
|
recorded in the bottom 3 bytes of bits as they are decoded.
|
|
The top byte is not used. */
|
|
switch (decoder->state)
|
|
{
|
|
case DECODER_RESET:
|
|
if (IS_UTF8_SINGLE_BYTE(b))
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b);
|
|
}
|
|
else if (IS_UTF8_FIRST_BYTE_OF_2(b))
|
|
{
|
|
/* UTF-8 2-byte sequences that are overlong encodings can be
|
|
detected from just the first byte (C0 or C1). */
|
|
decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6;
|
|
if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT)
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
|
|
}
|
|
else
|
|
{
|
|
decoder->state = DECODED_1_OF_2;
|
|
goto noreset;
|
|
}
|
|
}
|
|
else if (IS_UTF8_FIRST_BYTE_OF_3(b))
|
|
{
|
|
decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12;
|
|
decoder->state = DECODED_1_OF_3;
|
|
goto noreset;
|
|
}
|
|
else if (IS_UTF8_FIRST_BYTE_OF_4(b))
|
|
{
|
|
/* Some UTF-8 4-byte sequences that encode out-of-range
|
|
codepoints can be detected from the first byte (F5 - FF). */
|
|
decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18;
|
|
if (decoder->bits > MAX_CODEPOINT)
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
|
|
}
|
|
else
|
|
{
|
|
decoder->state = DECODED_1_OF_4;
|
|
goto noreset;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* The byte is of the form 11111xxx or 10xxxxxx, and is not
|
|
a valid first byte for a UTF-8 sequence. */
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
|
|
}
|
|
break;
|
|
|
|
case DECODED_1_OF_2:
|
|
if (IS_UTF8_CONTINUATION_BYTE(b))
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b));
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
|
|
|
|
}
|
|
break;
|
|
|
|
case DECODED_1_OF_3:
|
|
if (IS_UTF8_CONTINUATION_BYTE(b))
|
|
{
|
|
/* UTF-8 3-byte sequences that are overlong encodings or encode
|
|
surrogate codepoints can be detected after 2 bytes. */
|
|
decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
|
|
if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) || IS_SURROGATE(decoder->bits))
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
|
|
}
|
|
else
|
|
{
|
|
decoder->state = DECODED_2_OF_3;
|
|
goto noreset;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
|
|
}
|
|
break;
|
|
|
|
case DECODED_2_OF_3:
|
|
if (IS_UTF8_CONTINUATION_BYTE(b))
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b));
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
|
|
}
|
|
break;
|
|
|
|
case DECODED_1_OF_4:
|
|
if (IS_UTF8_CONTINUATION_BYTE(b))
|
|
{
|
|
/* UTF-8 4-byte sequences that are overlong encodings or encode
|
|
out-of-range codepoints can be detected after 2 bytes. */
|
|
decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12;
|
|
if ((decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) || (decoder->bits > MAX_CODEPOINT))
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
|
|
}
|
|
else
|
|
{
|
|
decoder->state = DECODED_2_OF_4;
|
|
goto noreset;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
|
|
}
|
|
break;
|
|
|
|
case DECODED_2_OF_4:
|
|
if (IS_UTF8_CONTINUATION_BYTE(b))
|
|
{
|
|
decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
|
|
decoder->state = DECODED_3_OF_4;
|
|
goto noreset;
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
|
|
}
|
|
break;
|
|
|
|
case DECODED_3_OF_4:
|
|
if (IS_UTF8_CONTINUATION_BYTE(b))
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b));
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0);
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF16LE:
|
|
/* When the input encoding is UTF-16, the decoded codepoint's bits are
|
|
recorded in the bottom 2 bytes of bits as they are decoded.
|
|
If those 2 bytes form a leading surrogate, the decoder treats the
|
|
surrogate pair as a single 4-byte sequence, shifts the leading
|
|
surrogate into the high 2 bytes of bits, and decodes the
|
|
trailing surrogate's bits in the bottom 2 bytes of bits. */
|
|
switch (decoder->state)
|
|
{
|
|
case DECODER_RESET:
|
|
decoder->bits = b;
|
|
decoder->state = DECODED_1_OF_2;
|
|
goto noreset;
|
|
|
|
case DECODED_1_OF_2:
|
|
decoder->bits |= (uint32_t)b << 8;
|
|
if (IS_TRAILING_SURROGATE(decoder->bits))
|
|
{
|
|
/* A trailing surrogate cannot appear on its own. */
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
|
|
}
|
|
else if (IS_LEADING_SURROGATE(decoder->bits))
|
|
{
|
|
/* A leading surrogate implies a 4-byte surrogate pair. */
|
|
decoder->bits <<= 16;
|
|
decoder->state = DECODED_2_OF_4;
|
|
goto noreset;
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
|
|
}
|
|
break;
|
|
|
|
case DECODED_2_OF_4:
|
|
decoder->bits |= b;
|
|
decoder->state = DECODED_3_OF_4;
|
|
goto noreset;
|
|
|
|
case DECODED_3_OF_4:
|
|
decoder->bits |= (uint32_t)b << 8;
|
|
if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
|
|
{
|
|
/* A leading surrogate must be followed by a trailing one.
|
|
Treat the previous 3 bytes as an invalid 2-byte sequence
|
|
followed by the first byte of a new sequence. */
|
|
decoder->bits &= 0xFF;
|
|
decoder->state = DECODED_1_OF_2;
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
|
|
goto noreset;
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF16BE:
|
|
/* When the input encoding is UTF-16, the decoded codepoint's bits are
|
|
recorded in the bottom 2 bytes of bits as they are decoded.
|
|
If those 2 bytes form a leading surrogate, the decoder treats the
|
|
surrogate pair as a single 4-byte sequence, shifts the leading
|
|
surrogate into the high 2 bytes of bits, and decodes the
|
|
trailing surrogate's bits in the bottom 2 bytes of bits. */
|
|
switch (decoder->state)
|
|
{
|
|
case DECODER_RESET:
|
|
decoder->bits = (uint32_t)b << 8;
|
|
decoder->state = DECODED_1_OF_2;
|
|
goto noreset;
|
|
|
|
case DECODED_1_OF_2:
|
|
decoder->bits |= b;
|
|
if (IS_TRAILING_SURROGATE(decoder->bits))
|
|
{
|
|
/* A trailing surrogate cannot appear on its own. */
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
|
|
}
|
|
else if (IS_LEADING_SURROGATE(decoder->bits))
|
|
{
|
|
/* A leading surrogate implies a 4-byte surrogate pair. */
|
|
decoder->bits <<= 16;
|
|
decoder->state = DECODED_2_OF_4;
|
|
goto noreset;
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
|
|
}
|
|
break;
|
|
|
|
case DECODED_2_OF_4:
|
|
decoder->bits |= (uint32_t)b << 8;
|
|
decoder->state = DECODED_3_OF_4;
|
|
goto noreset;
|
|
|
|
case DECODED_3_OF_4:
|
|
decoder->bits |= b;
|
|
if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
|
|
{
|
|
/* A leading surrogate must be followed by a trailing one.
|
|
Treat the previous 3 bytes as an invalid 2-byte sequence
|
|
followed by the first byte of a new sequence. */
|
|
decoder->bits &= 0xFF00;
|
|
decoder->state = DECODED_1_OF_2;
|
|
output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
|
|
goto noreset;
|
|
}
|
|
else
|
|
{
|
|
output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF32LE:
|
|
/* When the input encoding is UTF-32, the decoded codepoint's bits are
|
|
recorded in bits as they are decoded. */
|
|
switch (decoder->state)
|
|
{
|
|
case DECODER_RESET:
|
|
decoder->state = DECODED_1_OF_4;
|
|
decoder->bits = (uint32_t)b;
|
|
goto noreset;
|
|
|
|
case DECODED_1_OF_4:
|
|
decoder->state = DECODED_2_OF_4;
|
|
decoder->bits |= (uint32_t)b << 8;
|
|
goto noreset;
|
|
|
|
case DECODED_2_OF_4:
|
|
decoder->state = DECODED_3_OF_4;
|
|
decoder->bits |= (uint32_t)b << 16;
|
|
goto noreset;
|
|
|
|
case DECODED_3_OF_4:
|
|
decoder->bits |= (uint32_t)b << 24;
|
|
output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT))
|
|
? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
|
|
: DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF32BE:
|
|
/* When the input encoding is UTF-32, the decoded codepoint's bits are
|
|
recorded in bits as they are decoded. */
|
|
switch (decoder->state)
|
|
{
|
|
case DECODER_RESET:
|
|
decoder->state = DECODED_1_OF_4;
|
|
decoder->bits = (uint32_t)b << 24;
|
|
goto noreset;
|
|
|
|
case DECODED_1_OF_4:
|
|
decoder->state = DECODED_2_OF_4;
|
|
decoder->bits |= (uint32_t)b << 16;
|
|
goto noreset;
|
|
|
|
case DECODED_2_OF_4:
|
|
decoder->state = DECODED_3_OF_4;
|
|
decoder->bits |= (uint32_t)b << 8;
|
|
goto noreset;
|
|
|
|
case DECODED_3_OF_4:
|
|
decoder->bits |= b;
|
|
output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT))
|
|
? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
|
|
: DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* Reset the decoder for the next sequence. */
|
|
Decoder_Reset(decoder);
|
|
|
|
noreset:
|
|
return output;
|
|
}
|
|
|
|
/******************** Unicode Encoder ********************/
|
|
|
|
/* This function makes the following assumptions about its input:
|
|
|
|
1. The c argument is a valid codepoint (U+0000 - U+10FFFF).
|
|
2. The encoding argument is not JSON_UnknownEncoding.
|
|
3. The pBytes argument points to an array of at least 4 bytes.
|
|
*/
|
|
static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes)
|
|
{
|
|
size_t length = 0;
|
|
switch (encoding)
|
|
{
|
|
case JSON_UTF8:
|
|
if (c < FIRST_2_BYTE_UTF8_CODEPOINT)
|
|
{
|
|
pBytes[0] = (byte)c;
|
|
length = 1;
|
|
}
|
|
else if (c < FIRST_3_BYTE_UTF8_CODEPOINT)
|
|
{
|
|
pBytes[0] = (byte)(0xC0 | (c >> 6));
|
|
pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c));
|
|
length = 2;
|
|
}
|
|
else if (c < FIRST_4_BYTE_UTF8_CODEPOINT)
|
|
{
|
|
pBytes[0] = (byte)(0xE0 | (c >> 12));
|
|
pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
|
|
pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c));
|
|
length = 3;
|
|
}
|
|
else
|
|
{
|
|
pBytes[0] = (byte)(0xF0 | (c >> 18));
|
|
pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12));
|
|
pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
|
|
pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c));
|
|
length = 4;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF16LE:
|
|
if (c < FIRST_NON_BMP_CODEPOINT)
|
|
{
|
|
pBytes[0] = (byte)(c);
|
|
pBytes[1] = (byte)(c >> 8);
|
|
length = 2;
|
|
}
|
|
else
|
|
{
|
|
uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
|
|
|
|
/* Leading surrogate. */
|
|
pBytes[0] = (byte)(surrogates >> 16);
|
|
pBytes[1] = (byte)(surrogates >> 24);
|
|
|
|
/* Trailing surrogate. */
|
|
pBytes[2] = (byte)(surrogates);
|
|
pBytes[3] = (byte)(surrogates >> 8);
|
|
length = 4;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF16BE:
|
|
if (c < FIRST_NON_BMP_CODEPOINT)
|
|
{
|
|
pBytes[1] = (byte)(c);
|
|
pBytes[0] = (byte)(c >> 8);
|
|
length = 2;
|
|
}
|
|
else
|
|
{
|
|
/* The codepoint requires a surrogate pair in UTF-16. */
|
|
uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
|
|
|
|
/* Leading surrogate. */
|
|
pBytes[1] = (byte)(surrogates >> 16);
|
|
pBytes[0] = (byte)(surrogates >> 24);
|
|
|
|
/* Trailing surrogate. */
|
|
pBytes[3] = (byte)(surrogates);
|
|
pBytes[2] = (byte)(surrogates >> 8);
|
|
length = 4;
|
|
}
|
|
break;
|
|
|
|
case JSON_UTF32LE:
|
|
pBytes[0] = (byte)(c);
|
|
pBytes[1] = (byte)(c >> 8);
|
|
pBytes[2] = (byte)(c >> 16);
|
|
pBytes[3] = (byte)(c >> 24);
|
|
length = 4;
|
|
break;
|
|
|
|
case JSON_UTF32BE:
|
|
pBytes[3] = (byte)(c);
|
|
pBytes[2] = (byte)(c >> 8);
|
|
pBytes[1] = (byte)(c >> 16);
|
|
pBytes[0] = (byte)(c >> 24);
|
|
length = 4;
|
|
break;
|
|
}
|
|
return length;
|
|
}
|
|
|
|
/******************** JSON Lexer States ********************/
|
|
|
|
/* Mutually-exclusive lexer states. */
|
|
#define LEXING_WHITESPACE 0
|
|
#define LEXING_LITERAL 1
|
|
#define LEXING_STRING 2
|
|
#define LEXING_STRING_ESCAPE 3
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_1 4
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_2 5
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_3 6
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_4 7
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_5 8
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_6 9
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_7 10
|
|
#define LEXING_STRING_HEX_ESCAPE_BYTE_8 11
|
|
#define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12
|
|
#define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U 13
|
|
#define LEXING_NUMBER_AFTER_MINUS 14
|
|
#define LEXING_NUMBER_AFTER_LEADING_ZERO 15
|
|
#define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO 16
|
|
#define LEXING_NUMBER_AFTER_X 17
|
|
#define LEXING_NUMBER_HEX_DIGITS 18
|
|
#define LEXING_NUMBER_DECIMAL_DIGITS 19
|
|
#define LEXING_NUMBER_AFTER_DOT 20
|
|
#define LEXING_NUMBER_FRACTIONAL_DIGITS 21
|
|
#define LEXING_NUMBER_AFTER_E 22
|
|
#define LEXING_NUMBER_AFTER_EXPONENT_SIGN 23
|
|
#define LEXING_NUMBER_EXPONENT_DIGITS 24
|
|
#define LEXING_COMMENT_AFTER_SLASH 25
|
|
#define LEXING_SINGLE_LINE_COMMENT 26
|
|
#define LEXING_MULTI_LINE_COMMENT 27
|
|
#define LEXING_MULTI_LINE_COMMENT_AFTER_STAR 28
|
|
#define LEXER_ERROR 255
|
|
typedef byte LexerState;
|
|
|
|
/******************** JSON Grammarian ********************/
|
|
|
|
/* The JSON grammar comprises the following productions:
|
|
|
|
1. VALUE => null
|
|
2. VALUE => boolean
|
|
3. VALUE => string
|
|
4. VALUE => number
|
|
5. VALUE => specialnumber
|
|
6. VALUE => { MEMBERS }
|
|
7. VALUE => [ ITEMS ]
|
|
8. MEMBERS => MEMBER MORE_MEMBERS
|
|
9. MEMBERS => e
|
|
10. MEMBER => string : VALUE
|
|
11. MORE_MEMBERS => , MEMBER MORE_MEMBERS
|
|
12. MORE_MEMBERS => e
|
|
13. ITEMS => ITEM MORE_ITEMS
|
|
14. ITEMS => e
|
|
15. ITEM => VALUE
|
|
16. MORE_ITEMS => , ITEM MORE_ITEMS
|
|
17. MORE_ITEMS => e
|
|
|
|
We implement a simple LL(1) parser based on this grammar, with events
|
|
emitted when certain non-terminals are replaced.
|
|
*/
|
|
|
|
/* Mutually-exclusive grammar tokens and non-terminals. The values are defined
|
|
so that the bottom 4 bits of a value can be used as an index into the
|
|
grammar production rule table. */
|
|
#define T_NONE 0x00 /* tokens are in the form 0x0X */
|
|
#define T_NULL 0x01
|
|
#define T_TRUE 0x02
|
|
#define T_FALSE 0x03
|
|
#define T_STRING 0x04
|
|
#define T_NUMBER 0x05
|
|
#define T_NAN 0x06
|
|
#define T_INFINITY 0x07
|
|
#define T_NEGATIVE_INFINITY 0x08
|
|
#define T_LEFT_CURLY 0x09
|
|
#define T_RIGHT_CURLY 0x0A
|
|
#define T_LEFT_SQUARE 0x0B
|
|
#define T_RIGHT_SQUARE 0x0C
|
|
#define T_COLON 0x0D
|
|
#define T_COMMA 0x0E
|
|
#define NT_VALUE 0x10 /* non-terminals are in the form 0x1X */
|
|
#define NT_MEMBERS 0x11
|
|
#define NT_MEMBER 0x12
|
|
#define NT_MORE_MEMBERS 0x13
|
|
#define NT_ITEMS 0x14
|
|
#define NT_ITEM 0x15
|
|
#define NT_MORE_ITEMS 0x16
|
|
typedef byte Symbol;
|
|
|
|
#define IS_NONTERMINAL(s) ((s) & 0x10)
|
|
#define IS_TOKEN(s) !IS_NONTERMINAL(s)
|
|
|
|
/* Grammarian data. */
|
|
typedef struct tag_GrammarianData
|
|
{
|
|
Symbol* pStack; /* initially set to defaultStack */
|
|
size_t stackSize;
|
|
size_t stackUsed;
|
|
Symbol defaultStack[DEFAULT_SYMBOL_STACK_SIZE];
|
|
} GrammarianData;
|
|
typedef GrammarianData* Grammarian;
|
|
|
|
/* Mutually-exclusive result codes returned by the grammarian
|
|
after processing a token. */
|
|
#define ACCEPTED_TOKEN 0
|
|
#define REJECTED_TOKEN 1
|
|
#define SYMBOL_STACK_FULL 2
|
|
typedef uint32_t GrammarianResultCode;
|
|
|
|
/* Events emitted by the grammarian as a result of processing a
|
|
token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed
|
|
with one of the other values. */
|
|
#define EMIT_NOTHING 0x00
|
|
#define EMIT_NULL 0x01
|
|
#define EMIT_BOOLEAN 0x02
|
|
#define EMIT_STRING 0x03
|
|
#define EMIT_NUMBER 0x04
|
|
#define EMIT_SPECIAL_NUMBER 0x05
|
|
#define EMIT_START_OBJECT 0x06
|
|
#define EMIT_END_OBJECT 0x07
|
|
#define EMIT_OBJECT_MEMBER 0x08
|
|
#define EMIT_START_ARRAY 0x09
|
|
#define EMIT_END_ARRAY 0x0A
|
|
#define EMIT_ARRAY_ITEM 0x10 /* may be combined with other values */
|
|
typedef byte GrammarEvent;
|
|
|
|
/* The bits of GrammarianOutput are layed out as follows:
|
|
|
|
-rreeeee
|
|
|
|
- = unused (1 bit)
|
|
r = result code (2 bits)
|
|
e = event (5 bits)
|
|
*/
|
|
#define GRAMMARIAN_OUTPUT(r, e) (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e))
|
|
#define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5)
|
|
#define GRAMMARIAN_EVENT(o) (GrammarEvent)((GrammarianOutput)(o) & 0x1F)
|
|
typedef byte GrammarianOutput;
|
|
|
|
/* Grammar rule used by the grammarian to process a token. */
|
|
typedef struct tag_GrammarRule
|
|
{
|
|
Symbol symbolToPush1;
|
|
Symbol symbolToPush2;
|
|
byte reprocess;
|
|
GrammarEvent emit;
|
|
} GrammarRule;
|
|
|
|
/* Grammarian functions. */
|
|
|
|
static void Grammarian_Reset(Grammarian grammarian, int isInitialized)
|
|
{
|
|
/* When we reset the grammarian, we keep the symbol stack that has
|
|
already been allocated, if any. If the client wants to reclaim the
|
|
memory used by the that buffer, he needs to free the grammarian
|
|
and create a new one. */
|
|
if (!isInitialized)
|
|
{
|
|
grammarian->pStack = grammarian->defaultStack;
|
|
grammarian->stackSize = sizeof(grammarian->defaultStack);
|
|
}
|
|
|
|
/* The grammarian always starts with NT_VALUE on the symbol stack. */
|
|
grammarian->pStack[0] = NT_VALUE;
|
|
grammarian->stackUsed = 1;
|
|
}
|
|
|
|
static void Grammarian_FreeAllocations(Grammarian grammarian,
|
|
const JSON_MemorySuite* pMemorySuite)
|
|
{
|
|
if (grammarian->pStack != grammarian->defaultStack)
|
|
pMemorySuite->free(pMemorySuite->userData, grammarian->pStack);
|
|
}
|
|
|
|
static int Grammarian_FinishedDocument(Grammarian grammarian)
|
|
{
|
|
return !grammarian->stackUsed;
|
|
}
|
|
|
|
static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian,
|
|
Symbol token, const JSON_MemorySuite* pMemorySuite)
|
|
{
|
|
/* The order and number of the rows and columns in this table must
|
|
match the defined token and non-terminal symbol values.
|
|
|
|
The row index is the incoming token's Symbol value.
|
|
|
|
The column index is the bottom 4 bits of Symbol value of
|
|
the non-terminal at the top of the processing stack.
|
|
Since non-terminal Symbol values start at 0x10, taking
|
|
the bottom 4 bits yields a 0-based index. */
|
|
static const byte ruleLookup[15][7] =
|
|
{
|
|
/* V MS M MM IS I MI */
|
|
/* ---- */ { 0, 0, 0, 0, 0, 0, 0 },
|
|
/* null */ { 1, 0, 0, 0, 13, 15, 0 },
|
|
/* true */ { 2, 0, 0, 0, 13, 15, 0 },
|
|
/* false */ { 2, 0, 0, 0, 13, 15, 0 },
|
|
/* string */ { 3, 8, 10, 0, 13, 15, 0 },
|
|
/* number */ { 4, 0, 0, 0, 13, 15, 0 },
|
|
/* NaN */ { 5, 0, 0, 0, 13, 15, 0 },
|
|
/* Inf */ { 5, 0, 0, 0, 13, 15, 0 },
|
|
/* -Inf */ { 5, 0, 0, 0, 13, 15, 0 },
|
|
/* { */ { 6, 0, 0, 0, 13, 15, 0 },
|
|
/* } */ { 0, 9, 0, 12, 0, 0, 0 },
|
|
/* [ */ { 7, 0, 0, 0, 13, 15, 0 },
|
|
/* ] */ { 0, 0, 0, 0, 14, 0, 17 },
|
|
/* : */ { 0, 0, 0, 0, 0, 0, 0 },
|
|
/* , */ { 0, 0, 0, 11, 0, 0, 16 }
|
|
};
|
|
|
|
static const GrammarRule rules[17] =
|
|
{
|
|
/* 1. */ { T_NONE, T_NONE, 0, EMIT_NULL },
|
|
/* 2. */ { T_NONE, T_NONE, 0, EMIT_BOOLEAN },
|
|
/* 3. */ { T_NONE, T_NONE, 0, EMIT_STRING },
|
|
/* 4. */ { T_NONE, T_NONE, 0, EMIT_NUMBER },
|
|
/* 5. */ { T_NONE, T_NONE, 0, EMIT_SPECIAL_NUMBER },
|
|
/* 6. */ { T_RIGHT_CURLY, NT_MEMBERS, 0, EMIT_START_OBJECT },
|
|
/* 7. */ { T_RIGHT_SQUARE, NT_ITEMS, 0, EMIT_START_ARRAY },
|
|
/* 8. */ { NT_MORE_MEMBERS, NT_MEMBER, 1, EMIT_NOTHING },
|
|
/* 9. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT },
|
|
/* 10. */ { NT_VALUE, T_COLON, 0, EMIT_OBJECT_MEMBER },
|
|
/* 11. */ { NT_MORE_MEMBERS, NT_MEMBER, 0, EMIT_NOTHING },
|
|
/* 12. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT },
|
|
/* 13. */ { NT_MORE_ITEMS, NT_ITEM, 1, EMIT_NOTHING },
|
|
/* 14. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY },
|
|
/* 15. */ { NT_VALUE, T_NONE, 1, EMIT_ARRAY_ITEM },
|
|
/* 16. */ { NT_MORE_ITEMS, NT_ITEM, 0, EMIT_NOTHING },
|
|
/* 17. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY }
|
|
};
|
|
|
|
GrammarEvent emit = EMIT_NOTHING;
|
|
|
|
/* If the stack is empty, no more tokens were expected. */
|
|
if (Grammarian_FinishedDocument(grammarian))
|
|
return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
|
|
|
|
for (;;)
|
|
{
|
|
Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1];
|
|
if (IS_TOKEN(topSymbol))
|
|
{
|
|
if (topSymbol != token)
|
|
return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
|
|
grammarian->stackUsed--;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
const GrammarRule* pRule = NULL;
|
|
byte ruleNumber = ruleLookup[token][BOTTOM_4_BITS(topSymbol)];
|
|
|
|
if (ruleNumber == 0)
|
|
return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
|
|
|
|
pRule = &rules[ruleNumber - 1];
|
|
|
|
/* The rule removes the top symbol and does not replace it. */
|
|
if (pRule->symbolToPush1 == T_NONE)
|
|
grammarian->stackUsed--;
|
|
else
|
|
{
|
|
/* The rule replaces the top symbol with 1 or 2 symbols. */
|
|
grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1;
|
|
if (pRule->symbolToPush2 != T_NONE)
|
|
{
|
|
/* The rule replaces the top symbol with 2 symbols.
|
|
Make sure the stack has room for the second one. */
|
|
if (grammarian->stackUsed == grammarian->stackSize)
|
|
{
|
|
Symbol* pBiggerStack = DoubleBuffer(pMemorySuite,
|
|
grammarian->defaultStack, grammarian->pStack,
|
|
grammarian->stackSize);
|
|
|
|
if (!pBiggerStack)
|
|
return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING);
|
|
|
|
grammarian->pStack = pBiggerStack;
|
|
grammarian->stackSize *= 2;
|
|
}
|
|
grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2;
|
|
grammarian->stackUsed++;
|
|
}
|
|
}
|
|
emit |= pRule->emit;
|
|
if (!pRule->reprocess)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit);
|
|
}
|
|
|
|
/******************** JSON Parser ********************/
|
|
|
|
#ifndef JSON_NO_PARSER
|
|
|
|
/* Combinable parser state flags. */
|
|
#define PARSER_RESET 0x00
|
|
#define PARSER_STARTED 0x01
|
|
#define PARSER_FINISHED 0x02
|
|
#define PARSER_IN_PROTECTED_API 0x04
|
|
#define PARSER_IN_TOKEN_HANDLER 0x08
|
|
#define PARSER_AFTER_CARRIAGE_RETURN 0x10
|
|
typedef byte ParserState;
|
|
|
|
/* Combinable parser settings flags. */
|
|
#define PARSER_DEFAULT_FLAGS 0x00
|
|
#define PARSER_ALLOW_BOM 0x01
|
|
#define PARSER_ALLOW_COMMENTS 0x02
|
|
#define PARSER_ALLOW_SPECIAL_NUMBERS 0x04
|
|
#define PARSER_ALLOW_HEX_NUMBERS 0x08
|
|
#define PARSER_REPLACE_INVALID 0x10
|
|
#define PARSER_TRACK_OBJECT_MEMBERS 0x20
|
|
#define PARSER_ALLOW_CONTROL_CHARS 0x40
|
|
#define PARSER_EMBEDDED_DOCUMENT 0x80
|
|
typedef byte ParserFlags;
|
|
|
|
/* Sentinel value for parser error location offset. */
|
|
#define ERROR_LOCATION_IS_TOKEN_START 0xFF
|
|
|
|
/* An object member name stored in an unordered, singly-linked-list, used for
|
|
detecting duplicate member names. Note that the name string is not null-
|
|
terminated. */
|
|
typedef struct tag_MemberName
|
|
{
|
|
struct tag_MemberName* pNextName;
|
|
size_t length;
|
|
byte pBytes[1]; /* variable-size buffer */
|
|
} MemberName;
|
|
|
|
/* An object's list of member names, and a pointer to the object's
|
|
nearest ancestor object, if any. This is used as a stack. Because arrays
|
|
do not have named items, they do not need to be recorded in the stack. */
|
|
typedef struct tag_MemberNames
|
|
{
|
|
struct tag_MemberNames* pAncestor;
|
|
MemberName* pFirstName;
|
|
} MemberNames;
|
|
|
|
/* A parser instance. */
|
|
struct JSON_Parser_Data
|
|
{
|
|
JSON_MemorySuite memorySuite;
|
|
void* userData;
|
|
ParserState state;
|
|
ParserFlags flags;
|
|
Encoding inputEncoding;
|
|
Encoding stringEncoding;
|
|
Encoding numberEncoding;
|
|
Symbol token;
|
|
TokenAttributes tokenAttributes;
|
|
Error error;
|
|
byte errorOffset;
|
|
LexerState lexerState;
|
|
uint32_t lexerBits;
|
|
size_t codepointLocationByte;
|
|
size_t codepointLocationLine;
|
|
size_t codepointLocationColumn;
|
|
size_t tokenLocationByte;
|
|
size_t tokenLocationLine;
|
|
size_t tokenLocationColumn;
|
|
size_t depth;
|
|
byte* pTokenBytes;
|
|
size_t tokenBytesLength;
|
|
size_t tokenBytesUsed;
|
|
size_t maxStringLength;
|
|
size_t maxNumberLength;
|
|
MemberNames* pMemberNames;
|
|
DecoderData decoderData;
|
|
GrammarianData grammarianData;
|
|
JSON_Parser_EncodingDetectedHandler encodingDetectedHandler;
|
|
JSON_Parser_NullHandler nullHandler;
|
|
JSON_Parser_BooleanHandler booleanHandler;
|
|
JSON_Parser_StringHandler stringHandler;
|
|
JSON_Parser_NumberHandler numberHandler;
|
|
JSON_Parser_SpecialNumberHandler specialNumberHandler;
|
|
JSON_Parser_StartObjectHandler startObjectHandler;
|
|
JSON_Parser_EndObjectHandler endObjectHandler;
|
|
JSON_Parser_ObjectMemberHandler objectMemberHandler;
|
|
JSON_Parser_StartArrayHandler startArrayHandler;
|
|
JSON_Parser_EndArrayHandler endArrayHandler;
|
|
JSON_Parser_ArrayItemHandler arrayItemHandler;
|
|
byte defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH];
|
|
};
|
|
|
|
/* Parser internal functions. */
|
|
|
|
static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error)
|
|
{
|
|
parser->error = error;
|
|
}
|
|
|
|
static void JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
JSON_Parser parser, Error error, int codepointsAgo)
|
|
{
|
|
/* Note that backtracking from the current codepoint requires us to make
|
|
three assumptions, which are always valid in the context of a string
|
|
escape sequence:
|
|
|
|
1. The input encoding is not JSON_UnknownEncoding.
|
|
|
|
2 The codepoints we are backing up across are all in the range
|
|
U+0000 - U+007F, aka ASCII, so we can assume the number of
|
|
bytes comprising them based on the input encoding.
|
|
|
|
3. The codepoints we are backing up across do not include any
|
|
line breaks, so we can assume that the line number stays the
|
|
same and the column number can simply be decremented.
|
|
*/
|
|
parser->error = error;
|
|
parser->errorOffset = (byte)codepointsAgo;
|
|
}
|
|
|
|
static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error)
|
|
{
|
|
parser->error = error;
|
|
parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser)
|
|
{
|
|
MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc(
|
|
parser->memorySuite.userData, NULL, sizeof(MemberNames));
|
|
|
|
if (!pNames)
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
|
|
return JSON_Failure;
|
|
}
|
|
|
|
pNames->pAncestor = parser->pMemberNames;
|
|
pNames->pFirstName = NULL;
|
|
parser->pMemberNames = pNames;
|
|
return JSON_Success;
|
|
}
|
|
|
|
static void JSON_Parser_PopMemberNameList(JSON_Parser parser)
|
|
{
|
|
MemberNames* pAncestor = parser->pMemberNames->pAncestor;
|
|
while (parser->pMemberNames->pFirstName)
|
|
{
|
|
MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName;
|
|
parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName);
|
|
parser->pMemberNames->pFirstName = pNextName;
|
|
}
|
|
parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames);
|
|
parser->pMemberNames = pAncestor;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject)
|
|
{
|
|
if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) &&
|
|
!JSON_Parser_PushMemberNameList(parser))
|
|
{
|
|
return JSON_Failure;
|
|
}
|
|
parser->depth++;
|
|
return JSON_Success;
|
|
}
|
|
|
|
static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject)
|
|
{
|
|
parser->depth--;
|
|
if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
|
|
{
|
|
JSON_Parser_PopMemberNameList(parser);
|
|
}
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser)
|
|
{
|
|
if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
|
|
{
|
|
MemberName* pName;
|
|
for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName)
|
|
{
|
|
if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length))
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1);
|
|
if (!pName)
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
|
|
return JSON_Failure;
|
|
}
|
|
pName->pNextName = parser->pMemberNames->pFirstName;
|
|
pName->length = parser->tokenBytesUsed;
|
|
memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed);
|
|
parser->pMemberNames->pFirstName = pName;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized)
|
|
{
|
|
parser->userData = NULL;
|
|
parser->flags = PARSER_DEFAULT_FLAGS;
|
|
parser->inputEncoding = JSON_UnknownEncoding;
|
|
parser->stringEncoding = JSON_UTF8;
|
|
parser->numberEncoding = JSON_UTF8;
|
|
parser->token = T_NONE;
|
|
parser->tokenAttributes = 0;
|
|
parser->error = JSON_Error_None;
|
|
parser->errorOffset = 0;
|
|
parser->lexerState = LEXING_WHITESPACE;
|
|
parser->lexerBits = 0;
|
|
parser->codepointLocationByte = 0;
|
|
parser->codepointLocationLine = 0;
|
|
parser->codepointLocationColumn = 0;
|
|
parser->tokenLocationByte = 0;
|
|
parser->tokenLocationLine = 0;
|
|
parser->tokenLocationColumn = 0;
|
|
parser->depth = 0;
|
|
|
|
if (!isInitialized)
|
|
{
|
|
parser->pTokenBytes = parser->defaultTokenBytes;
|
|
parser->tokenBytesLength = sizeof(parser->defaultTokenBytes);
|
|
}
|
|
else
|
|
{
|
|
/* When we reset the parser, we keep the output buffer and the symbol
|
|
stack that have already been allocated, if any. If the client wants
|
|
to reclaim the memory used by the those buffers, he needs to free
|
|
the parser and create a new one. */
|
|
}
|
|
parser->tokenBytesUsed = 0;
|
|
parser->maxStringLength = SIZE_MAX;
|
|
parser->maxNumberLength = SIZE_MAX;
|
|
if (!isInitialized)
|
|
parser->pMemberNames = NULL;
|
|
else
|
|
{
|
|
while (parser->pMemberNames)
|
|
JSON_Parser_PopMemberNameList(parser);
|
|
}
|
|
Decoder_Reset(&parser->decoderData);
|
|
Grammarian_Reset(&parser->grammarianData, isInitialized);
|
|
parser->encodingDetectedHandler = NULL;
|
|
parser->nullHandler = NULL;
|
|
parser->booleanHandler = NULL;
|
|
parser->stringHandler = NULL;
|
|
parser->numberHandler = NULL;
|
|
parser->specialNumberHandler = NULL;
|
|
parser->startObjectHandler = NULL;
|
|
parser->endObjectHandler = NULL;
|
|
parser->objectMemberHandler = NULL;
|
|
parser->startArrayHandler = NULL;
|
|
parser->endArrayHandler = NULL;
|
|
parser->arrayItemHandler = NULL;
|
|
parser->state = PARSER_RESET; /* do this last! */
|
|
}
|
|
|
|
static void JSON_Parser_NullTerminateToken(JSON_Parser parser)
|
|
{
|
|
/* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
|
|
available at the end of the token buffer when we record codepoints, we
|
|
can write the null terminator to the buffer with impunity. */
|
|
static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 };
|
|
Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding);
|
|
memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, (size_t)SHORTEST_ENCODING_SEQUENCE(encoding));
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser)
|
|
{
|
|
/* The symbol stack should be empty when parsing finishes. */
|
|
if (!Grammarian_FinishedDocument(&parser->grammarianData))
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens);
|
|
return JSON_Failure;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser);
|
|
static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler)
|
|
{
|
|
if (handler)
|
|
{
|
|
JSON_Parser_HandlerResult result;
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
result = handler(parser);
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
if (result != JSON_Parser_Continue)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser)
|
|
{
|
|
if (parser->booleanHandler)
|
|
{
|
|
JSON_Parser_HandlerResult result;
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False);
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
if (result != JSON_Parser_Continue)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember)
|
|
{
|
|
JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler;
|
|
if (handler)
|
|
{
|
|
JSON_Parser_HandlerResult result;
|
|
JSON_Parser_NullTerminateToken(parser);
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes);
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
|
|
if (result != JSON_Parser_Continue)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser,
|
|
(isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember)
|
|
? JSON_Error_DuplicateObjectMember
|
|
: JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser)
|
|
{
|
|
if (parser->numberHandler)
|
|
{
|
|
JSON_Parser_HandlerResult result;
|
|
JSON_Parser_NullTerminateToken(parser);
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
result = parser->numberHandler(parser, (char*)parser->pTokenBytes,
|
|
parser->tokenBytesUsed, parser->tokenAttributes);
|
|
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
|
|
if (result != JSON_Parser_Continue)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)
|
|
{
|
|
if (parser->specialNumberHandler)
|
|
{
|
|
JSON_Parser_HandlerResult result;
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN :
|
|
(parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity));
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
|
|
|
|
if (result != JSON_Parser_Continue)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit)
|
|
{
|
|
if (GET_FLAGS(emit, EMIT_ARRAY_ITEM))
|
|
{
|
|
if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler))
|
|
{
|
|
return JSON_Failure;
|
|
}
|
|
SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM);
|
|
}
|
|
switch (emit)
|
|
{
|
|
case EMIT_NULL:
|
|
if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_BOOLEAN:
|
|
if (!JSON_Parser_CallBooleanHandler(parser))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_STRING:
|
|
if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_NUMBER:
|
|
if (!JSON_Parser_CallNumberHandler(parser))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_SPECIAL_NUMBER:
|
|
if (!JSON_Parser_CallSpecialNumberHandler(parser))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_START_OBJECT:
|
|
if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) ||
|
|
!JSON_Parser_StartContainer(parser, 1/*isObject*/))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_END_OBJECT:
|
|
JSON_Parser_EndContainer(parser, 1/*isObject*/);
|
|
if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler))
|
|
return JSON_Failure;
|
|
break;
|
|
case EMIT_OBJECT_MEMBER:
|
|
if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */
|
|
!JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_START_ARRAY:
|
|
if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) ||
|
|
!JSON_Parser_StartContainer(parser, 0/*isObject*/))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case EMIT_END_ARRAY:
|
|
JSON_Parser_EndContainer(parser, 0/*isObject*/);
|
|
if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler))
|
|
return JSON_Failure;
|
|
break;
|
|
}
|
|
|
|
if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument);
|
|
return JSON_Failure;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser)
|
|
{
|
|
GrammarianOutput output;
|
|
output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite);
|
|
switch (GRAMMARIAN_RESULT_CODE(output))
|
|
{
|
|
case ACCEPTED_TOKEN:
|
|
if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output)))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case REJECTED_TOKEN:
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken);
|
|
return JSON_Failure;
|
|
|
|
case SYMBOL_STACK_FULL:
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
|
|
return JSON_Failure;
|
|
}
|
|
|
|
/* Reset the lexer to prepare for the next token. */
|
|
parser->lexerState = LEXING_WHITESPACE;
|
|
parser->lexerBits = 0;
|
|
parser->token = T_NONE;
|
|
parser->tokenAttributes = 0;
|
|
parser->tokenBytesUsed = 0;
|
|
return JSON_Success;
|
|
}
|
|
|
|
/* Lexer functions. */
|
|
|
|
static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0 };
|
|
|
|
#define NULL_LITERAL_EXPECTED_CHARS_START_INDEX 0
|
|
#define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX 4
|
|
#define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX 8
|
|
#define NAN_LITERAL_EXPECTED_CHARS_START_INDEX 13
|
|
#define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16
|
|
|
|
/* Forward declaration. */
|
|
static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser);
|
|
static JSON_Status JSON_Parser_ProcessCodepoint(
|
|
JSON_Parser parser, Codepoint c, size_t encodedLength);
|
|
|
|
static JSON_Status JSON_Parser_HandleInvalidEncodingSequence(
|
|
JSON_Parser parser, size_t encodedLength)
|
|
{
|
|
if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID))
|
|
{
|
|
/* Since we're inside a string token, replacing the invalid sequence
|
|
with the Unicode replacement character as requested by the client
|
|
is a viable way to avoid a parse failure. Outside a string token,
|
|
such a replacement would simply trigger JSON_Error_UnknownToken
|
|
when we tried to process the replacement character, so it's less
|
|
confusing to stick with JSON_Error_InvalidEncodingSequence in that
|
|
case. */
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter);
|
|
return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength);
|
|
}
|
|
else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
|
|
{
|
|
/* Since we're parsing the top-level value of an embedded
|
|
document, assume that the invalid encoding sequence we've
|
|
encountered does not actually belong to the document, and
|
|
finish parsing by pretending that we've encountered EOF
|
|
instead of an invalid sequence. If the content is valid,
|
|
this will fail with JSON_Error_StoppedAfterEmbeddedDocument;
|
|
otherwise, it will fail with an appropriate error. */
|
|
return (JSON_Status)(JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser));
|
|
}
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence);
|
|
return JSON_Failure;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser,
|
|
Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove)
|
|
{
|
|
SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove);
|
|
if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
|
|
{
|
|
/* The invalid number is the top-level value of an embedded document,
|
|
and it has a prefix that can be interpreted as a valid number.
|
|
We want to backtrack so that we are at the end of that prefix,
|
|
and then process the valid token.
|
|
|
|
Note that backtracking requires us to make three assumptions, which
|
|
are always valid in the context of a number token:
|
|
|
|
1. The input encoding is not JSON_UnknownEncoding.
|
|
|
|
2 The codepoints we are backing up across are all in the range
|
|
U+0000 - U+007F, aka ASCII, so we can assume the number of
|
|
bytes comprising them based on the input encoding.
|
|
|
|
3. The codepoints we are backing up across do not include any
|
|
line breaks, so we can assume that the line number stays the
|
|
same and the column number can simply be decremented.
|
|
|
|
For example:
|
|
|
|
"01" => "0"
|
|
"123.!" => "123"
|
|
"123e!" => "123"
|
|
"123e+!" => "123"
|
|
"123e-!" => "123"
|
|
"1.2e!" => "1.2"
|
|
"1.2e+!" => "1.2"
|
|
"1.2e-!" => "1.2"
|
|
*/
|
|
parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber
|
|
* (size_t)SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding);
|
|
parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber;
|
|
parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber
|
|
* (size_t)SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding);
|
|
return JSON_Parser_ProcessToken(parser); /* always fails */
|
|
}
|
|
/* Allow JSON_Parser_FlushLexer() to fail. */
|
|
else if (c == EOF_CODEPOINT)
|
|
return JSON_Success;
|
|
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber);
|
|
return JSON_Failure;
|
|
}
|
|
|
|
static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token)
|
|
{
|
|
parser->token = token;
|
|
parser->tokenLocationByte = parser->codepointLocationByte;
|
|
parser->tokenLocationLine = parser->codepointLocationLine;
|
|
parser->tokenLocationColumn = parser->codepointLocationColumn;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength)
|
|
{
|
|
Encoding tokenEncoding;
|
|
size_t maxTokenLength;
|
|
int tokenFinished = 0;
|
|
Codepoint codepointToRecord = EOF_CODEPOINT;
|
|
|
|
/* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current
|
|
codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single
|
|
line break. */
|
|
if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN))
|
|
{
|
|
if (c == LINE_FEED_CODEPOINT)
|
|
parser->codepointLocationLine--;
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
|
|
}
|
|
|
|
reprocess:
|
|
|
|
switch (parser->lexerState)
|
|
{
|
|
case LEXING_WHITESPACE:
|
|
if (c == '{')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_LEFT_CURLY);
|
|
tokenFinished = 1;
|
|
}
|
|
else if (c == '}')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_RIGHT_CURLY);
|
|
tokenFinished = 1;
|
|
}
|
|
else if (c == '[')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_LEFT_SQUARE);
|
|
tokenFinished = 1;
|
|
}
|
|
else if (c == ']')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_RIGHT_SQUARE);
|
|
tokenFinished = 1;
|
|
}
|
|
else if (c == ':')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_COLON);
|
|
tokenFinished = 1;
|
|
}
|
|
else if (c == ',')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_COMMA);
|
|
tokenFinished = 1;
|
|
}
|
|
else if (c == 'n')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_NULL);
|
|
parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX;
|
|
parser->lexerState = LEXING_LITERAL;
|
|
}
|
|
else if (c == 't')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_TRUE);
|
|
parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX;
|
|
parser->lexerState = LEXING_LITERAL;
|
|
}
|
|
else if (c == 'f')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_FALSE);
|
|
parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX;
|
|
parser->lexerState = LEXING_LITERAL;
|
|
}
|
|
else if (c == '"')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_STRING);
|
|
parser->lexerState = LEXING_STRING;
|
|
}
|
|
else if (c == '-')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_NUMBER);
|
|
parser->tokenAttributes = JSON_IsNegative;
|
|
codepointToRecord = '-';
|
|
parser->lexerState = LEXING_NUMBER_AFTER_MINUS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == '0')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_NUMBER);
|
|
codepointToRecord = '0';
|
|
parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c >= '1' && c <= '9')
|
|
{
|
|
JSON_Parser_StartToken(parser, T_NUMBER);
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT ||
|
|
c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT)
|
|
{
|
|
/* Ignore whitespace between tokens. */
|
|
}
|
|
else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0)
|
|
{
|
|
/* OK, we'll allow the BOM. */
|
|
if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) { }
|
|
else
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS))
|
|
{
|
|
/* Comments are not real tokens, but we save the location
|
|
of the comment as the token location in case of an error. */
|
|
parser->tokenLocationByte = parser->codepointLocationByte;
|
|
parser->tokenLocationLine = parser->codepointLocationLine;
|
|
parser->tokenLocationColumn = parser->codepointLocationColumn;
|
|
parser->lexerState = LEXING_COMMENT_AFTER_SLASH;
|
|
}
|
|
else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
|
|
{
|
|
JSON_Parser_StartToken(parser, T_NAN);
|
|
parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX;
|
|
parser->lexerState = LEXING_LITERAL;
|
|
}
|
|
else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
|
|
{
|
|
JSON_Parser_StartToken(parser, T_INFINITY);
|
|
parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
|
|
parser->lexerState = LEXING_LITERAL;
|
|
}
|
|
else
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken);
|
|
return JSON_Failure;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_LITERAL:
|
|
/* While lexing a literal we store an index into expectedLiteralChars
|
|
in lexerBits. */
|
|
if (expectedLiteralChars[parser->lexerBits])
|
|
{
|
|
/* The codepoint should match the next character in the literal. */
|
|
if (c != expectedLiteralChars[parser->lexerBits])
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
|
|
return JSON_Failure;
|
|
}
|
|
parser->lexerBits++;
|
|
|
|
/* If the literal is the top-level value of an embedded document,
|
|
process it as soon as we consume its last expected codepoint.
|
|
Normally we defer processing until the following codepoint
|
|
has been examined, so that we can treat sequences like "nullx"
|
|
as a single, unknown token rather than a null literal followed
|
|
by an unknown token. */
|
|
if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) &&
|
|
!expectedLiteralChars[parser->lexerBits])
|
|
tokenFinished = 1;
|
|
}
|
|
else
|
|
{
|
|
/* The literal should be finished, so the codepoint should not be
|
|
a plausible JSON literal character, but rather EOF, whitespace,
|
|
or the first character of the next token. */
|
|
if ((c >= 'A' && c <= 'Z') ||
|
|
(c >= 'a' && c <= 'z') ||
|
|
(c >= '0' && c <= '9') ||
|
|
(c == '_'))
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
|
|
return JSON_Failure;
|
|
}
|
|
if (!JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
goto reprocess;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_STRING:
|
|
/* Allow JSON_Parser_FlushLexer() to fail. */
|
|
if (c == EOF_CODEPOINT) { }
|
|
else if (c == '"')
|
|
tokenFinished = 1;
|
|
else if (c == '\\')
|
|
parser->lexerState = LEXING_STRING_ESCAPE;
|
|
else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS))
|
|
{
|
|
/* ASCII control characters (U+0000 - U+001F) are not allowed to
|
|
appear unescaped in string values unless specifically allowed. */
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter);
|
|
return JSON_Failure;
|
|
}
|
|
else
|
|
{
|
|
codepointToRecord = c;
|
|
goto recordStringCodepointAndAdvance;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_STRING_ESCAPE:
|
|
if (c == EOF_CODEPOINT)
|
|
{
|
|
/* Allow JSON_Parser_FlushLexer() to fail. */
|
|
}
|
|
else
|
|
{
|
|
if (c == 'u')
|
|
parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1;
|
|
else
|
|
{
|
|
if (c == '"' || c == '\\' || c == '/')
|
|
codepointToRecord = c;
|
|
else if (c == 'b')
|
|
codepointToRecord = BACKSPACE_CODEPOINT;
|
|
else if (c == 't')
|
|
codepointToRecord = TAB_CODEPOINT;
|
|
else if (c == 'n')
|
|
codepointToRecord = LINE_FEED_CODEPOINT;
|
|
else if (c == 'f')
|
|
codepointToRecord = FORM_FEED_CODEPOINT;
|
|
else if (c == 'r')
|
|
codepointToRecord = CARRIAGE_RETURN_CODEPOINT;
|
|
else
|
|
{
|
|
/* The current codepoint location is the first character after
|
|
the backslash that started the escape sequence. The error
|
|
location should be the beginning of the escape sequence, 1
|
|
character earlier. */
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1);
|
|
return JSON_Failure;
|
|
}
|
|
parser->lexerState = LEXING_STRING;
|
|
goto recordStringCodepointAndAdvance;
|
|
}
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_1:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_2:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_3:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_4:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_5:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_6:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_7:
|
|
case LEXING_STRING_HEX_ESCAPE_BYTE_8:
|
|
/* Allow JSON_Parser_FlushLexer() to fail. */
|
|
if (c != EOF_CODEPOINT)
|
|
{
|
|
/* While lexing a string hex escape sequence we store the bytes
|
|
of the escaped codepoint in the low 2 bytes of lexerBits. If
|
|
the escape sequence represents a leading surrogate, we shift
|
|
the leading surrogate into the high 2 bytes and lex a second
|
|
hex escape sequence (which should be a trailing surrogate). */
|
|
int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3;
|
|
uint32_t nibble;
|
|
if (c >= '0' && c <= '9')
|
|
nibble = c - '0';
|
|
else if (c >= 'A' && c <= 'F')
|
|
nibble = c - 'A' + 10;
|
|
else if (c >= 'a' && c <= 'f')
|
|
nibble = c - 'a' + 10;
|
|
else
|
|
{
|
|
/* The current codepoint location is one of the 4 hex digit
|
|
character slots in the hex escape sequence. The error
|
|
location should be the beginning of the hex escape
|
|
sequence, between 2 and 5 bytes earlier. */
|
|
int codepointsAgo = 2 /* for "\u" */ + byteNumber;
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
parser, JSON_Error_InvalidEscapeSequence, codepointsAgo);
|
|
return JSON_Failure;
|
|
}
|
|
/* Store the hex digit's bits in the appropriate byte of lexerBits. */
|
|
nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ;
|
|
parser->lexerBits |= nibble;
|
|
if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4)
|
|
{
|
|
/* The escape sequence is complete. We need to check whether
|
|
it represents a leading surrogate (which implies that it
|
|
will be immediately followed by a hex-escaped trailing
|
|
surrogate), a trailing surrogate (which is invalid), or a
|
|
valid codepoint (which should simply be appended to the
|
|
string token value). */
|
|
if (IS_LEADING_SURROGATE(parser->lexerBits))
|
|
{
|
|
/* Shift the leading surrogate into the high 2 bytes of
|
|
lexerBits so that the trailing surrogate can be stored
|
|
in the low 2 bytes. */
|
|
parser->lexerBits <<= 16;
|
|
parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH;
|
|
}
|
|
else if (IS_TRAILING_SURROGATE(parser->lexerBits))
|
|
{
|
|
/* The current codepoint location is the last hex digit
|
|
of the hex escape sequence. The error location should
|
|
be the beginning of the hex escape sequence, 5
|
|
characters earlier. */
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5);
|
|
return JSON_Failure;
|
|
}
|
|
else
|
|
{
|
|
/* The escape sequence represents a BMP codepoint. */
|
|
codepointToRecord = parser->lexerBits;
|
|
parser->lexerBits = 0;
|
|
parser->lexerState = LEXING_STRING;
|
|
goto recordStringCodepointAndAdvance;
|
|
}
|
|
}
|
|
else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8)
|
|
{
|
|
/* The second hex escape sequence is complete. We need to
|
|
check whether it represents a trailing surrogate as
|
|
expected. If so, the surrogate pair represents a single
|
|
non-BMP codepoint. */
|
|
if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF))
|
|
{
|
|
/* The current codepoint location is the last hex digit of
|
|
the second hex escape sequence. The error location
|
|
should be the beginning of the leading surrogate
|
|
hex escape sequence, 11 characters earlier. */
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11);
|
|
return JSON_Failure;
|
|
}
|
|
/* The escape sequence represents a non-BMP codepoint. */
|
|
codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits);
|
|
parser->lexerBits = 0;
|
|
parser->lexerState = LEXING_STRING;
|
|
goto recordStringCodepointAndAdvance;
|
|
}
|
|
else
|
|
parser->lexerState++;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH:
|
|
if (c != EOF_CODEPOINT)
|
|
{
|
|
if (c != '\\')
|
|
{
|
|
/* The current codepoint location is the first character after
|
|
the leading surrogate hex escape sequence. The error
|
|
location should be the beginning of the leading surrogate
|
|
hex escape sequence, 6 characters earlier. */
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6);
|
|
return JSON_Failure;
|
|
}
|
|
parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U:
|
|
if (c != EOF_CODEPOINT)
|
|
{
|
|
if (c != 'u')
|
|
{
|
|
/* Distinguish between a totally bogus escape sequence
|
|
and a valid one that just isn't the hex escape kind
|
|
that we require for a trailing surrogate. The current
|
|
codepoint location is the first character after the
|
|
backslash that should have introduced the trailing
|
|
surrogate hex escape sequence. */
|
|
if (c == '"' || c == '\\' || c == '/' || c == 'b' ||
|
|
c == 't' || c == 'n' || c == 'f' || c == 'r')
|
|
{
|
|
/* The error location should be at that beginning of the
|
|
leading surrogate's hex escape sequence, 7 characters
|
|
earlier. */
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7);
|
|
}
|
|
else
|
|
{
|
|
/* The error location should be at that backslash, 1
|
|
character earlier. */
|
|
JSON_Parser_SetErrorAtStringEscapeSequenceStart(
|
|
parser, JSON_Error_InvalidEscapeSequence, 1);
|
|
}
|
|
return JSON_Failure;
|
|
}
|
|
parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_AFTER_MINUS:
|
|
if (c == EOF_CODEPOINT)
|
|
{
|
|
/* Allow JSON_Parser_FlushLexer() to fail. */
|
|
}
|
|
else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
|
|
{
|
|
parser->token = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */
|
|
parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
|
|
parser->lexerState = LEXING_LITERAL;
|
|
}
|
|
else
|
|
{
|
|
if (c == '0')
|
|
{
|
|
codepointToRecord = '0';
|
|
parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c >= '1' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else
|
|
{
|
|
/* We trigger an unknown token error rather than an invalid number
|
|
error so that "Foo" and "-Foo" trigger the same error. */
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_AFTER_LEADING_ZERO:
|
|
case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
|
|
if (c == '.')
|
|
{
|
|
codepointToRecord = '.';
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_DOT;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == 'e' || c == 'E')
|
|
{
|
|
codepointToRecord = c;
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_E;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c >= '0' && c <= '9')
|
|
{
|
|
/* JSON does not allow the integer part of a number to have any
|
|
digits after a leading zero. */
|
|
if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0))
|
|
return JSON_Failure;
|
|
}
|
|
else if ((c == 'x' || c == 'X') &&
|
|
parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO &&
|
|
GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS))
|
|
{
|
|
codepointToRecord = c;
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_X;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else
|
|
{
|
|
/* The number is finished. */
|
|
if (!JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
goto reprocess;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_AFTER_X:
|
|
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
|
|
{
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_HEX_DIGITS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex))
|
|
return JSON_Failure;
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_HEX_DIGITS:
|
|
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
|
|
{
|
|
codepointToRecord = c;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
/* The number is finished. */
|
|
if (!JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
goto reprocess;
|
|
|
|
case LEXING_NUMBER_DECIMAL_DIGITS:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == '.')
|
|
{
|
|
codepointToRecord = '.';
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_DOT;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == 'e' || c == 'E')
|
|
{
|
|
codepointToRecord = c;
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_E;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
/* The number is finished. */
|
|
if (!JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
goto reprocess;
|
|
|
|
case LEXING_NUMBER_AFTER_DOT:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint))
|
|
return JSON_Failure;
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_FRACTIONAL_DIGITS:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == 'e' || c == 'E')
|
|
{
|
|
codepointToRecord = c;
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_E;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
/* The number is finished. */
|
|
if (!JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
goto reprocess;
|
|
|
|
case LEXING_NUMBER_AFTER_E:
|
|
if (c == '+')
|
|
{
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c == '-')
|
|
{
|
|
codepointToRecord = c;
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent);
|
|
parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (c >= '0' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent))
|
|
return JSON_Failure;
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent))
|
|
return JSON_Failure;
|
|
goto advance;
|
|
|
|
case LEXING_NUMBER_EXPONENT_DIGITS:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
codepointToRecord = c;
|
|
goto recordNumberCodepointAndAdvance;
|
|
}
|
|
/* The number is finished. */
|
|
if (!JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
goto reprocess;
|
|
|
|
case LEXING_COMMENT_AFTER_SLASH:
|
|
if (c == '/')
|
|
parser->lexerState = LEXING_SINGLE_LINE_COMMENT;
|
|
else if (c == '*')
|
|
parser->lexerState = LEXING_MULTI_LINE_COMMENT;
|
|
else
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
|
|
return JSON_Failure;
|
|
}
|
|
goto advance;
|
|
|
|
case LEXING_SINGLE_LINE_COMMENT:
|
|
if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT)
|
|
parser->lexerState = LEXING_WHITESPACE;
|
|
goto advance;
|
|
|
|
case LEXING_MULTI_LINE_COMMENT:
|
|
if (c == '*')
|
|
parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR;
|
|
goto advance;
|
|
|
|
case LEXING_MULTI_LINE_COMMENT_AFTER_STAR:
|
|
if (c == '/')
|
|
parser->lexerState = LEXING_WHITESPACE;
|
|
else if (c != '*')
|
|
parser->lexerState = LEXING_MULTI_LINE_COMMENT;
|
|
goto advance;
|
|
}
|
|
|
|
recordStringCodepointAndAdvance:
|
|
|
|
tokenEncoding = parser->stringEncoding;
|
|
maxTokenLength = parser->maxStringLength;
|
|
if (!codepointToRecord)
|
|
{
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter);
|
|
}
|
|
else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT)
|
|
{
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter);
|
|
}
|
|
else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT)
|
|
{
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter);
|
|
}
|
|
else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT)
|
|
{
|
|
SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter);
|
|
}
|
|
goto recordCodepointAndAdvance;
|
|
|
|
recordNumberCodepointAndAdvance:
|
|
|
|
tokenEncoding = parser->numberEncoding;
|
|
maxTokenLength = parser->maxNumberLength;
|
|
goto recordCodepointAndAdvance;
|
|
|
|
recordCodepointAndAdvance:
|
|
|
|
/* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
|
|
available in the buffer for the next codepoint, so we don't have to
|
|
check whether there is room when we decode a new codepoint, and if
|
|
there isn't another codepoint, we have space already allocated for
|
|
the encoded null terminator.*/
|
|
parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed);
|
|
if (parser->tokenBytesUsed > maxTokenLength)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString);
|
|
return JSON_Failure;
|
|
}
|
|
if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE)
|
|
{
|
|
byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength);
|
|
if (!pBiggerBuffer)
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
|
|
return JSON_Failure;
|
|
}
|
|
parser->pTokenBytes = pBiggerBuffer;
|
|
parser->tokenBytesLength *= 2;
|
|
}
|
|
goto advance;
|
|
|
|
advance:
|
|
|
|
/* The current codepoint has been accepted, so advance the codepoint
|
|
location counters accordingly. Note that the one time we don't
|
|
do this is when the codepoint is EOF, which doesn't actually
|
|
appear in the input stream. */
|
|
if (c == CARRIAGE_RETURN_CODEPOINT)
|
|
{
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
|
|
}
|
|
if (c != EOF_CODEPOINT)
|
|
{
|
|
parser->codepointLocationByte += encodedLength;
|
|
if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT)
|
|
{
|
|
/* The next character will begin a new line. */
|
|
parser->codepointLocationLine++;
|
|
parser->codepointLocationColumn = 0;
|
|
}
|
|
else
|
|
{
|
|
/* The next character will be on the same line. */
|
|
parser->codepointLocationColumn++;
|
|
}
|
|
}
|
|
|
|
if (tokenFinished && !JSON_Parser_ProcessToken(parser))
|
|
return JSON_Failure;
|
|
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser)
|
|
{
|
|
/* Push the EOF codepoint to the lexer so that it can finish the pending
|
|
token, if any. The EOF codepoint is never emitted by the decoder
|
|
itself, since it is outside the Unicode range and therefore cannot
|
|
be encoded in any of the possible input encodings. */
|
|
if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0))
|
|
return JSON_Failure;
|
|
|
|
/* The lexer should be idle when parsing finishes. */
|
|
if (parser->lexerState != LEXING_WHITESPACE)
|
|
{
|
|
JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken);
|
|
return JSON_Failure;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
/* Parser's decoder functions. */
|
|
|
|
static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)
|
|
{
|
|
if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue)
|
|
{
|
|
JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
/* Forward declaration. */
|
|
static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length);
|
|
|
|
static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b)
|
|
{
|
|
/* When the input encoding is unknown, the first 4 bytes of input are
|
|
recorded in decoder.bits. */
|
|
byte bytes[LONGEST_ENCODING_SEQUENCE];
|
|
|
|
switch (parser->decoderData.state)
|
|
{
|
|
case DECODER_RESET:
|
|
parser->decoderData.state = DECODED_1_OF_4;
|
|
parser->decoderData.bits = (uint32_t)b << 24;
|
|
break;
|
|
|
|
case DECODED_1_OF_4:
|
|
parser->decoderData.state = DECODED_2_OF_4;
|
|
parser->decoderData.bits |= (uint32_t)b << 16;
|
|
break;
|
|
|
|
case DECODED_2_OF_4:
|
|
parser->decoderData.state = DECODED_3_OF_4;
|
|
parser->decoderData.bits |= (uint32_t)b << 8;
|
|
break;
|
|
|
|
case DECODED_3_OF_4:
|
|
bytes[0] = (byte)(parser->decoderData.bits >> 24);
|
|
bytes[1] = (byte)(parser->decoderData.bits >> 16);
|
|
bytes[2] = (byte)(parser->decoderData.bits >> 8);
|
|
bytes[3] = (byte)(b);
|
|
|
|
/* We try to match the following patterns in order, where .. is any
|
|
byte value and nz is any non-zero byte value:
|
|
EF BB BF .. => UTF-8 with BOM
|
|
FF FE 00 00 => UTF-32LE with BOM
|
|
FF FE nz 00 => UTF-16LE with BOM
|
|
00 00 FE FF -> UTF-32BE with BOM
|
|
FE FF .. .. => UTF-16BE with BOM
|
|
nz nz .. .. => UTF-8
|
|
nz 00 nz .. => UTF-16LE
|
|
nz 00 00 00 => UTF-32LE
|
|
00 nz .. .. => UTF-16BE
|
|
00 00 00 nz => UTF-32BE
|
|
.. .. .. .. => unknown encoding */
|
|
if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
|
|
{
|
|
/* EF BB BF .. */
|
|
parser->inputEncoding = JSON_UTF8;
|
|
}
|
|
else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00)
|
|
{
|
|
/* FF FE 00 00 or
|
|
FF FE nz 00 */
|
|
parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE;
|
|
}
|
|
else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF)
|
|
{
|
|
/* 00 00 FE FF */
|
|
parser->inputEncoding = JSON_UTF32BE;
|
|
}
|
|
else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
|
|
{
|
|
/* FE FF .. .. */
|
|
parser->inputEncoding = JSON_UTF16BE;
|
|
}
|
|
else if (bytes[0] != 0x00)
|
|
{
|
|
/* nz .. .. .. */
|
|
if (bytes[1] != 0x00)
|
|
{
|
|
/* nz nz .. .. */
|
|
parser->inputEncoding = JSON_UTF8;
|
|
}
|
|
else if (bytes[2] != 0x00)
|
|
{
|
|
/* nz 00 nz .. */
|
|
parser->inputEncoding = JSON_UTF16LE;
|
|
}
|
|
else if (bytes[3] == 0x00)
|
|
{
|
|
/* nz 00 00 00 */
|
|
parser->inputEncoding = JSON_UTF32LE;
|
|
}
|
|
else
|
|
{
|
|
/* nz 00 00 nz => error */
|
|
}
|
|
}
|
|
else if (bytes[1] != 0x00)
|
|
{
|
|
/* 00 nz .. .. */
|
|
parser->inputEncoding = JSON_UTF16BE;
|
|
}
|
|
else if (bytes[2] == 0x00 && bytes[3] != 0x00)
|
|
{
|
|
/* 00 00 00 nz */
|
|
parser->inputEncoding = JSON_UTF32BE;
|
|
}
|
|
else
|
|
{
|
|
/* 00 00 nz .. or
|
|
00 00 00 00 => error */
|
|
}
|
|
|
|
if (parser->inputEncoding == JSON_UnknownEncoding)
|
|
return JSON_Parser_HandleInvalidEncodingSequence(parser, 4);
|
|
|
|
if (!JSON_Parser_CallEncodingDetectedHandler(parser))
|
|
return JSON_Failure;
|
|
|
|
/* Reset the decoder before reprocessing the bytes. */
|
|
Decoder_Reset(&parser->decoderData);
|
|
return JSON_Parser_ProcessInputBytes(parser, bytes, 4);
|
|
}
|
|
|
|
/* We don't have 4 bytes yet. */
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length)
|
|
{
|
|
/* Note that if length is 0, pBytes is allowed to be NULL. */
|
|
size_t i = 0;
|
|
while (parser->inputEncoding == JSON_UnknownEncoding && i < length)
|
|
{
|
|
if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i]))
|
|
return JSON_Failure;
|
|
i++;
|
|
}
|
|
while (i < length)
|
|
{
|
|
DecoderOutput output = Decoder_ProcessByte(
|
|
&parser->decoderData, parser->inputEncoding, pBytes[i]);
|
|
DecoderResultCode result = DECODER_RESULT_CODE(output);
|
|
switch (result)
|
|
{
|
|
case SEQUENCE_PENDING:
|
|
i++;
|
|
break;
|
|
|
|
case SEQUENCE_COMPLETE:
|
|
if (!JSON_Parser_ProcessCodepoint(
|
|
parser, DECODER_CODEPOINT(output),
|
|
DECODER_SEQUENCE_LENGTH(output)))
|
|
return JSON_Failure;
|
|
i++;
|
|
break;
|
|
|
|
case SEQUENCE_INVALID_INCLUSIVE:
|
|
i++;
|
|
/* fallthrough */
|
|
case SEQUENCE_INVALID_EXCLUSIVE:
|
|
if (!JSON_Parser_HandleInvalidEncodingSequence(
|
|
parser, DECODER_SEQUENCE_LENGTH(output)))
|
|
return JSON_Failure;
|
|
break;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser)
|
|
{
|
|
/* If the input was 1, 2, or 3 bytes long, and the input encoding was not
|
|
explicitly specified by the client, we can sometimes make a reasonable
|
|
guess. If the input was 1 or 3 bytes long, the only encoding that could
|
|
possibly be valid JSON is UF-8. If the input was 2 bytes long, we try
|
|
to match the following patterns in order, where .. is any byte value
|
|
and nz is any non-zero byte value:
|
|
FF FE => UTF-16LE with BOM
|
|
FE FF => UTF-16BE with BOM
|
|
nz nz => UTF-8
|
|
nz 00 => UTF-16LE
|
|
00 nz => UTF-16BE
|
|
.. .. => unknown encoding
|
|
*/
|
|
if (parser->inputEncoding == JSON_UnknownEncoding &&
|
|
parser->decoderData.state != DECODER_RESET)
|
|
{
|
|
byte bytes[3];
|
|
size_t length = 0;
|
|
bytes[0] = (byte)(parser->decoderData.bits >> 24);
|
|
bytes[1] = (byte)(parser->decoderData.bits >> 16);
|
|
bytes[2] = (byte)(parser->decoderData.bits >> 8);
|
|
|
|
switch (parser->decoderData.state)
|
|
{
|
|
case DECODED_1_OF_4:
|
|
parser->inputEncoding = JSON_UTF8;
|
|
length = 1;
|
|
break;
|
|
|
|
case DECODED_2_OF_4:
|
|
/* FF FE */
|
|
if (bytes[0] == 0xFF && bytes[1] == 0xFE)
|
|
parser->inputEncoding = JSON_UTF16LE;
|
|
/* FE FF */
|
|
else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
|
|
parser->inputEncoding = JSON_UTF16BE;
|
|
else if (bytes[0] != 0x00)
|
|
{
|
|
/* nz nz or
|
|
nz 00 */
|
|
parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE;
|
|
}
|
|
/* 00 nz */
|
|
else if (bytes[1] != 0x00)
|
|
parser->inputEncoding = JSON_UTF16BE;
|
|
/* 00 00 */
|
|
else
|
|
return JSON_Parser_HandleInvalidEncodingSequence(parser, 2);
|
|
length = 2;
|
|
break;
|
|
|
|
case DECODED_3_OF_4:
|
|
parser->inputEncoding = JSON_UTF8;
|
|
length = 3;
|
|
break;
|
|
}
|
|
|
|
if (!JSON_Parser_CallEncodingDetectedHandler(parser))
|
|
return JSON_Failure;
|
|
|
|
/* Reset the decoder before reprocessing the bytes. */
|
|
parser->decoderData.state = DECODER_RESET;
|
|
parser->decoderData.bits = 0;
|
|
if (!JSON_Parser_ProcessInputBytes(parser, bytes, length))
|
|
return JSON_Failure;
|
|
}
|
|
|
|
/* The decoder should be idle when parsing finishes. */
|
|
if (Decoder_SequencePending(&parser->decoderData))
|
|
return JSON_Parser_HandleInvalidEncodingSequence(
|
|
parser, DECODER_STATE_BYTES(parser->decoderData.state));
|
|
return JSON_Success;
|
|
}
|
|
|
|
/* Parser API functions. */
|
|
|
|
JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite)
|
|
{
|
|
JSON_Parser parser;
|
|
JSON_MemorySuite memorySuite;
|
|
|
|
if (pMemorySuite)
|
|
{
|
|
memorySuite = *pMemorySuite;
|
|
|
|
/* The full memory suite must be specified. */
|
|
if (!memorySuite.realloc || !memorySuite.free)
|
|
return NULL;
|
|
}
|
|
else
|
|
memorySuite = defaultMemorySuite;
|
|
|
|
parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data));
|
|
|
|
if (!parser)
|
|
return NULL;
|
|
|
|
parser->memorySuite = memorySuite;
|
|
JSON_Parser_ResetData(parser, 0/* isInitialized */);
|
|
return parser;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
|
|
return JSON_Failure;
|
|
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
|
|
|
|
if (parser->pTokenBytes != parser->defaultTokenBytes)
|
|
parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes);
|
|
|
|
while (parser->pMemberNames)
|
|
JSON_Parser_PopMemberNameList(parser);
|
|
|
|
Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite);
|
|
parser->memorySuite.free(parser->memorySuite.userData, parser);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
|
|
return JSON_Failure;
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
|
|
JSON_Parser_ResetData(parser, 1/* isInitialized */);
|
|
/* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */
|
|
return JSON_Success;
|
|
}
|
|
|
|
void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->userData : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
parser->userData = userData;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser)
|
|
{
|
|
return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding)
|
|
{
|
|
if ( !parser
|
|
|| encoding < JSON_UnknownEncoding
|
|
|| encoding > JSON_UTF32BE
|
|
|| GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
parser->inputEncoding = (Encoding)encoding;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser)
|
|
{
|
|
return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding)
|
|
{
|
|
if (
|
|
!parser
|
|
|| encoding <= JSON_UnknownEncoding
|
|
|| encoding > JSON_UTF32BE
|
|
|| GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
parser->stringEncoding = (Encoding)encoding;
|
|
return JSON_Success;
|
|
}
|
|
|
|
size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->maxStringLength : SIZE_MAX;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength)
|
|
{
|
|
if ( !parser
|
|
|| GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
parser->maxStringLength = maxLength;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser)
|
|
{
|
|
return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding)
|
|
{
|
|
if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
parser->numberEncoding = (Encoding)encoding;
|
|
return JSON_Success;
|
|
}
|
|
|
|
size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->maxNumberLength : SIZE_MAX;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength)
|
|
{
|
|
if ( !parser
|
|
|| GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
parser->maxNumberLength = maxLength;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences(
|
|
JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
return JSON_Failure;
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
{
|
|
return JSON_Failure;
|
|
}
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)
|
|
{
|
|
return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument(
|
|
JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument)
|
|
{
|
|
if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
|
|
{
|
|
return JSON_Failure;
|
|
}
|
|
SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser)
|
|
{
|
|
return parser ? (JSON_Error)parser->error : JSON_Error_None;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_GetErrorLocation(
|
|
JSON_Parser parser, JSON_Location* pLocation)
|
|
{
|
|
if (!pLocation || !parser || parser->error == JSON_Error_None)
|
|
return JSON_Failure;
|
|
|
|
if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START)
|
|
{
|
|
pLocation->byte = parser->tokenLocationByte;
|
|
pLocation->line = parser->tokenLocationLine;
|
|
pLocation->column = parser->tokenLocationColumn;
|
|
}
|
|
else
|
|
{
|
|
pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset);
|
|
pLocation->line = parser->codepointLocationLine;
|
|
pLocation->column = parser->codepointLocationColumn - parser->errorOffset;
|
|
}
|
|
pLocation->depth = parser->depth;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_GetTokenLocation(
|
|
JSON_Parser parser, JSON_Location* pLocation)
|
|
{
|
|
if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
|
|
return JSON_Failure;
|
|
|
|
pLocation->byte = parser->tokenLocationByte;
|
|
pLocation->line = parser->tokenLocationLine;
|
|
pLocation->column = parser->tokenLocationColumn;
|
|
pLocation->depth = parser->depth;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation(
|
|
JSON_Parser parser, JSON_Location* pLocation)
|
|
{
|
|
if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
|
|
return JSON_Failure;
|
|
|
|
pLocation->byte = parser->codepointLocationByte;
|
|
pLocation->line = parser->codepointLocationLine;
|
|
pLocation->column = parser->codepointLocationColumn;
|
|
pLocation->depth = parser->depth;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->encodingDetectedHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler(
|
|
JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->encodingDetectedHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->nullHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetNullHandler(
|
|
JSON_Parser parser, JSON_Parser_NullHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->nullHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->booleanHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler(
|
|
JSON_Parser parser, JSON_Parser_BooleanHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->booleanHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->stringHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetStringHandler(
|
|
JSON_Parser parser, JSON_Parser_StringHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->stringHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->numberHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetNumberHandler(
|
|
JSON_Parser parser, JSON_Parser_NumberHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->numberHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->specialNumberHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler(
|
|
JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
parser->specialNumberHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->startObjectHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler(
|
|
JSON_Parser parser, JSON_Parser_StartObjectHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->startObjectHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->endObjectHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler(
|
|
JSON_Parser parser, JSON_Parser_EndObjectHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->endObjectHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->objectMemberHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler(
|
|
JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->objectMemberHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->startArrayHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler(
|
|
JSON_Parser parser, JSON_Parser_StartArrayHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->startArrayHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->endArrayHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler(
|
|
JSON_Parser parser, JSON_Parser_EndArrayHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->endArrayHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser)
|
|
{
|
|
return parser ? parser->arrayItemHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler(
|
|
JSON_Parser parser, JSON_Parser_ArrayItemHandler handler)
|
|
{
|
|
if (!parser)
|
|
return JSON_Failure;
|
|
|
|
parser->arrayItemHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal)
|
|
{
|
|
JSON_Status status = JSON_Failure;
|
|
if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API))
|
|
{
|
|
int finishedParsing = 0;
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API);
|
|
if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length))
|
|
{
|
|
/* New input was parsed successfully. */
|
|
if (isFinal)
|
|
{
|
|
/* Make sure there is nothing pending in the decoder, lexer,
|
|
or parser. */
|
|
if (JSON_Parser_FlushDecoder(parser) &&
|
|
JSON_Parser_FlushLexer(parser) &&
|
|
JSON_Parser_FlushParser(parser))
|
|
status = JSON_Success;
|
|
|
|
finishedParsing = 1;
|
|
}
|
|
else
|
|
status = JSON_Success;
|
|
}
|
|
else
|
|
{
|
|
/* New input failed to parse. */
|
|
finishedParsing = 1;
|
|
}
|
|
if (finishedParsing)
|
|
{
|
|
SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED);
|
|
}
|
|
SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
#endif /* JSON_NO_PARSER */
|
|
|
|
/******************** JSON Writer ********************/
|
|
|
|
#ifndef JSON_NO_WRITER
|
|
|
|
/* Combinable writer state flags. */
|
|
#define WRITER_RESET 0x0
|
|
#define WRITER_STARTED 0x1
|
|
#define WRITER_IN_PROTECTED_API 0x2
|
|
typedef byte WriterState;
|
|
|
|
/* Combinable writer settings flags. */
|
|
#define WRITER_DEFAULT_FLAGS 0x0
|
|
#define WRITER_USE_CRLF 0x1
|
|
#define WRITER_REPLACE_INVALID 0x2
|
|
#define WRITER_ESCAPE_NON_ASCII 0x4
|
|
typedef byte WriterFlags;
|
|
|
|
/* A writer instance. */
|
|
struct JSON_Writer_Data
|
|
{
|
|
JSON_MemorySuite memorySuite;
|
|
void* userData;
|
|
WriterState state;
|
|
WriterFlags flags;
|
|
Encoding outputEncoding;
|
|
Error error;
|
|
GrammarianData grammarianData;
|
|
JSON_Writer_OutputHandler outputHandler;
|
|
};
|
|
|
|
/* Writer internal functions. */
|
|
|
|
static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized)
|
|
{
|
|
writer->userData = NULL;
|
|
writer->flags = WRITER_DEFAULT_FLAGS;
|
|
writer->outputEncoding = JSON_UTF8;
|
|
writer->error = JSON_Error_None;
|
|
Grammarian_Reset(&writer->grammarianData, isInitialized);
|
|
writer->outputHandler = NULL;
|
|
writer->state = WRITER_RESET; /* do this last! */
|
|
}
|
|
|
|
static void JSON_Writer_SetError(JSON_Writer writer, Error error)
|
|
{
|
|
writer->error = error;
|
|
}
|
|
|
|
static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token)
|
|
{
|
|
GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite);
|
|
switch (GRAMMARIAN_RESULT_CODE(output))
|
|
{
|
|
case REJECTED_TOKEN:
|
|
JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken);
|
|
return JSON_Failure;
|
|
|
|
case SYMBOL_STACK_FULL:
|
|
JSON_Writer_SetError(writer, JSON_Error_OutOfMemory);
|
|
return JSON_Failure;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length)
|
|
{
|
|
if (writer->outputHandler && length)
|
|
{
|
|
if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue)
|
|
{
|
|
JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c)
|
|
{
|
|
switch (c)
|
|
{
|
|
case BACKSPACE_CODEPOINT:
|
|
return 'b';
|
|
|
|
case TAB_CODEPOINT:
|
|
return 't';
|
|
|
|
case LINE_FEED_CODEPOINT:
|
|
return 'n';
|
|
|
|
case FORM_FEED_CODEPOINT:
|
|
return 'f';
|
|
|
|
case CARRIAGE_RETURN_CODEPOINT:
|
|
return 'r';
|
|
|
|
case '"':
|
|
return '"';
|
|
|
|
case '/':
|
|
return '/';
|
|
|
|
case '\\':
|
|
return '\\';
|
|
|
|
case DELETE_CODEPOINT:
|
|
case LINE_SEPARATOR_CODEPOINT:
|
|
case PARAGRAPH_SEPARATOR_CODEPOINT:
|
|
return 'u';
|
|
|
|
default:
|
|
if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) ||
|
|
(GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT))
|
|
return 'u';
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
typedef struct tag_WriteBufferData
|
|
{
|
|
size_t used;
|
|
byte bytes[256];
|
|
} WriteBufferData;
|
|
typedef WriteBufferData* WriteBuffer;
|
|
|
|
static void WriteBuffer_Reset(WriteBuffer buffer)
|
|
{
|
|
buffer->used = 0;
|
|
}
|
|
|
|
static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer)
|
|
{
|
|
JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used);
|
|
buffer->used = 0;
|
|
return status;
|
|
}
|
|
|
|
static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length)
|
|
{
|
|
if (buffer->used + length > sizeof(buffer->bytes) &&
|
|
!WriteBuffer_Flush(buffer, writer))
|
|
return JSON_Failure;
|
|
|
|
memcpy(&buffer->bytes[buffer->used], pBytes, length);
|
|
buffer->used += length;
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
|
|
{
|
|
if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) &&
|
|
!WriteBuffer_Flush(buffer, writer))
|
|
return JSON_Failure;
|
|
|
|
buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]);
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
|
|
{
|
|
if (c >= FIRST_NON_BMP_CODEPOINT)
|
|
{
|
|
/* Non-BMP codepoints must be hex-escaped by escaping the UTF-16
|
|
surrogate pair for the codepoint. We put the leading surrogate
|
|
in the low 16 bits of c so that it gets written first, then
|
|
the second pass through the loop will write out the trailing
|
|
surrogate. x*/
|
|
c = SURROGATES_FROM_CODEPOINT(c);
|
|
c = (c << 16) | (c >> 16);
|
|
}
|
|
do
|
|
{
|
|
static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
|
byte escapeSequence[6];
|
|
int i;
|
|
escapeSequence[0] = '\\';
|
|
escapeSequence[1] = 'u';
|
|
escapeSequence[2] = hexDigits[(c >> 12) & 0xF];
|
|
escapeSequence[3] = hexDigits[(c >> 8) & 0xF];
|
|
escapeSequence[4] = hexDigits[(c >> 4) & 0xF];
|
|
escapeSequence[5] = hexDigits[c & 0xF];
|
|
for (i = 0; i < sizeof(escapeSequence); i++)
|
|
{
|
|
if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i]))
|
|
return JSON_Failure;
|
|
}
|
|
c >>= 16;
|
|
} while (c);
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
|
|
{
|
|
static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 };
|
|
static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF };
|
|
|
|
const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1];
|
|
size_t minSequenceLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
|
|
DecoderData decoderData;
|
|
WriteBufferData bufferData;
|
|
size_t i = 0;
|
|
|
|
WriteBuffer_Reset(&bufferData);
|
|
|
|
/* Start quote. */
|
|
if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength))
|
|
return JSON_Failure;
|
|
|
|
/* String contents. */
|
|
Decoder_Reset(&decoderData);
|
|
while (i < length)
|
|
{
|
|
DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
|
|
DecoderResultCode result = DECODER_RESULT_CODE(output);
|
|
Codepoint c;
|
|
Codepoint escapeCharacter;
|
|
switch (result)
|
|
{
|
|
case SEQUENCE_PENDING:
|
|
i++;
|
|
break;
|
|
|
|
case SEQUENCE_COMPLETE:
|
|
c = DECODER_CODEPOINT(output);
|
|
escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c);
|
|
switch (escapeCharacter)
|
|
{
|
|
case 0:
|
|
/* Output the codepoint as a normal encoding sequence. */
|
|
if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case 'u':
|
|
/* Output the codepoint as 1 or 2 hex escape sequences. */
|
|
if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
default:
|
|
/* Output the codepoint as a simple escape sequence. */
|
|
if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') ||
|
|
!WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter))
|
|
return JSON_Failure;
|
|
break;
|
|
}
|
|
i++;
|
|
break;
|
|
|
|
case SEQUENCE_INVALID_INCLUSIVE:
|
|
i++;
|
|
/* fallthrough */
|
|
case SEQUENCE_INVALID_EXCLUSIVE:
|
|
if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
|
|
{
|
|
if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
|
|
return JSON_Failure;
|
|
}
|
|
else
|
|
{
|
|
/* Output whatever valid bytes we've accumulated before failing. */
|
|
if (WriteBuffer_Flush(&bufferData, writer))
|
|
JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
|
|
return JSON_Failure;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (Decoder_SequencePending(&decoderData))
|
|
{
|
|
if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
|
|
{
|
|
if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
|
|
return JSON_Failure;
|
|
}
|
|
else
|
|
{
|
|
/* Output whatever valid bytes we've accumulated before failing. */
|
|
if (WriteBuffer_Flush(&bufferData, writer))
|
|
JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
|
|
/* End quote. */
|
|
if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) ||
|
|
!WriteBuffer_Flush(&bufferData, writer))
|
|
return JSON_Failure;
|
|
return JSON_Success;
|
|
}
|
|
|
|
static LexerState LexNumberCharacter(LexerState state, Codepoint c)
|
|
{
|
|
switch (state)
|
|
{
|
|
case LEXING_WHITESPACE:
|
|
if (c == '-')
|
|
state = LEXING_NUMBER_AFTER_MINUS;
|
|
else if (c == '0')
|
|
state = LEXING_NUMBER_AFTER_LEADING_ZERO;
|
|
else if (c >= '1' && c <= '9')
|
|
state = LEXING_NUMBER_DECIMAL_DIGITS;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_AFTER_MINUS:
|
|
if (c == '0')
|
|
state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
|
|
else if (c >= '1' && c <= '9')
|
|
state = LEXING_NUMBER_DECIMAL_DIGITS;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_AFTER_LEADING_ZERO:
|
|
case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
|
|
if (c == '.')
|
|
state = LEXING_NUMBER_AFTER_DOT;
|
|
else if (c == 'e' || c == 'E')
|
|
state = LEXING_NUMBER_AFTER_E;
|
|
else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO)
|
|
state = LEXING_NUMBER_AFTER_X;
|
|
else if (c == EOF_CODEPOINT)
|
|
state = LEXING_WHITESPACE;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_AFTER_X:
|
|
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
|
|
state = LEXING_NUMBER_HEX_DIGITS;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_HEX_DIGITS:
|
|
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
|
|
{
|
|
/* Still LEXING_NUMBER_HEX_DIGITS. */
|
|
}
|
|
else if (c == EOF_CODEPOINT)
|
|
state = LEXING_WHITESPACE;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_DECIMAL_DIGITS:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
/* Still LEXING_NUMBER_DECIMAL_DIGITS. */
|
|
}
|
|
else if (c == '.')
|
|
state = LEXING_NUMBER_AFTER_DOT;
|
|
else if (c == 'e' || c == 'E')
|
|
state = LEXING_NUMBER_AFTER_E;
|
|
else if (c == EOF_CODEPOINT)
|
|
state = LEXING_WHITESPACE;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_AFTER_DOT:
|
|
if (c >= '0' && c <= '9')
|
|
state = LEXING_NUMBER_FRACTIONAL_DIGITS;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_FRACTIONAL_DIGITS:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
/* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */
|
|
}
|
|
else if (c == 'e' || c == 'E')
|
|
state = LEXING_NUMBER_AFTER_E;
|
|
else if (c == EOF_CODEPOINT)
|
|
state = LEXING_WHITESPACE;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_AFTER_E:
|
|
if (c == '+' || c == '-')
|
|
state = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
|
|
else if (c >= '0' && c <= '9')
|
|
state = LEXING_NUMBER_EXPONENT_DIGITS;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
|
|
if (c >= '0' && c <= '9')
|
|
state = LEXING_NUMBER_EXPONENT_DIGITS;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
|
|
case LEXING_NUMBER_EXPONENT_DIGITS:
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
/* Still LEXING_NUMBER_EXPONENT_DIGITS. */
|
|
}
|
|
else if (c == EOF_CODEPOINT)
|
|
state = LEXING_WHITESPACE;
|
|
else
|
|
state = LEXER_ERROR;
|
|
break;
|
|
}
|
|
return state;
|
|
}
|
|
|
|
static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
|
|
{
|
|
DecoderData decoderData;
|
|
WriteBufferData bufferData;
|
|
LexerState lexerState = LEXING_WHITESPACE;
|
|
size_t i;
|
|
Decoder_Reset(&decoderData);
|
|
WriteBuffer_Reset(&bufferData);
|
|
for (i = 0; i < length; i++)
|
|
{
|
|
DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
|
|
DecoderResultCode result = DECODER_RESULT_CODE(output);
|
|
Codepoint c;
|
|
switch (result)
|
|
{
|
|
case SEQUENCE_PENDING:
|
|
break;
|
|
|
|
case SEQUENCE_COMPLETE:
|
|
c = DECODER_CODEPOINT(output);
|
|
lexerState = LexNumberCharacter(lexerState, c);
|
|
if (lexerState == LEXER_ERROR)
|
|
{
|
|
/* Output whatever valid bytes we've accumulated before failing. */
|
|
if (WriteBuffer_Flush(&bufferData, writer))
|
|
JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
|
|
return JSON_Failure;
|
|
}
|
|
if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
|
|
return JSON_Failure;
|
|
break;
|
|
|
|
case SEQUENCE_INVALID_INCLUSIVE:
|
|
case SEQUENCE_INVALID_EXCLUSIVE:
|
|
/* Output whatever valid bytes we've accumulated before failing. */
|
|
if (WriteBuffer_Flush(&bufferData, writer))
|
|
JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
|
|
return JSON_Failure;
|
|
}
|
|
}
|
|
if (!WriteBuffer_Flush(&bufferData, writer))
|
|
return JSON_Failure;
|
|
if (Decoder_SequencePending(&decoderData))
|
|
{
|
|
JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
|
|
return JSON_Failure;
|
|
}
|
|
if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR)
|
|
{
|
|
JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
|
|
return JSON_Failure;
|
|
}
|
|
return JSON_Success;
|
|
}
|
|
|
|
#define SPACES_PER_CHUNK 8
|
|
static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces)
|
|
{
|
|
static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
|
|
static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 };
|
|
static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 };
|
|
static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 };
|
|
|
|
size_t encodedLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
|
|
const byte* encoded = spacesEncodings[writer->outputEncoding - 1];
|
|
while (numberOfSpaces > SPACES_PER_CHUNK)
|
|
{
|
|
if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength))
|
|
return JSON_Failure;
|
|
numberOfSpaces -= SPACES_PER_CHUNK;
|
|
}
|
|
|
|
if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength))
|
|
return JSON_Failure;
|
|
return JSON_Success;
|
|
}
|
|
|
|
static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length)
|
|
{
|
|
JSON_Status status = JSON_Failure;
|
|
if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
|
|
{
|
|
size_t encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
|
|
if (JSON_Writer_ProcessToken(writer, token) &&
|
|
JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
|
|
status = JSON_Success;
|
|
SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/* Writer API functions. */
|
|
|
|
JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite)
|
|
{
|
|
JSON_Writer writer;
|
|
JSON_MemorySuite memorySuite;
|
|
if (pMemorySuite)
|
|
{
|
|
memorySuite = *pMemorySuite;
|
|
/* The full memory suite must be specified. */
|
|
if (!memorySuite.realloc || !memorySuite.free)
|
|
return NULL;
|
|
}
|
|
else
|
|
memorySuite = defaultMemorySuite;
|
|
|
|
writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data));
|
|
|
|
if (!writer)
|
|
return NULL;
|
|
|
|
writer->memorySuite = memorySuite;
|
|
JSON_Writer_ResetData(writer, 0/* isInitialized */);
|
|
return writer;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer)
|
|
{
|
|
if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
|
|
return JSON_Failure;
|
|
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite);
|
|
writer->memorySuite.free(writer->memorySuite.userData, writer);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer)
|
|
{
|
|
if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
|
|
return JSON_Failure;
|
|
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
JSON_Writer_ResetData(writer, 1/* isInitialized */);
|
|
/* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */
|
|
return JSON_Success;
|
|
}
|
|
|
|
void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer)
|
|
{
|
|
return writer ? writer->userData : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData)
|
|
{
|
|
if (!writer)
|
|
return JSON_Failure;
|
|
|
|
writer->userData = userData;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer)
|
|
{
|
|
return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding)
|
|
{
|
|
if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE)
|
|
return JSON_Failure;
|
|
|
|
writer->outputEncoding = (Encoding)encoding;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer)
|
|
{
|
|
return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF)
|
|
{
|
|
if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
|
|
return JSON_Failure;
|
|
|
|
SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)
|
|
{
|
|
return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences)
|
|
{
|
|
if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
|
|
return JSON_Failure;
|
|
|
|
SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)
|
|
{
|
|
return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters)
|
|
{
|
|
if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
|
|
return JSON_Failure;
|
|
|
|
SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters);
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer)
|
|
{
|
|
return writer ? (JSON_Error)writer->error : JSON_Error_None;
|
|
}
|
|
|
|
JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer)
|
|
{
|
|
return writer ? writer->outputHandler : NULL;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler)
|
|
{
|
|
if (!writer)
|
|
return JSON_Failure;
|
|
|
|
writer->outputHandler = handler;
|
|
return JSON_Success;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer)
|
|
{
|
|
static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' };
|
|
static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 };
|
|
static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 };
|
|
static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8));
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value)
|
|
{
|
|
static const byte trueUTF8[] = { 't', 'r', 'u', 'e' };
|
|
static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 };
|
|
static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 };
|
|
static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 };
|
|
|
|
static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' };
|
|
static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 };
|
|
static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 };
|
|
static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 };
|
|
|
|
Symbol token;
|
|
const byte* const* encodings;
|
|
size_t length;
|
|
if (value)
|
|
{
|
|
token = T_TRUE;
|
|
encodings = trueEncodings;
|
|
length = sizeof(trueUTF8);
|
|
}
|
|
else
|
|
{
|
|
token = T_FALSE;
|
|
encodings = falseEncodings;
|
|
length = sizeof(falseUTF8);
|
|
}
|
|
return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
|
|
{
|
|
JSON_Status status = JSON_Failure;
|
|
if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
|
|
!GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
|
|
{
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
|
|
if (JSON_Writer_ProcessToken(writer, T_STRING))
|
|
status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding);
|
|
|
|
SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
|
|
{
|
|
JSON_Status status = JSON_Failure;
|
|
if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
|
|
!GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
|
|
{
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
|
|
if (JSON_Writer_ProcessToken(writer, T_NUMBER))
|
|
status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding);
|
|
|
|
SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value)
|
|
{
|
|
static const byte nanUTF8[] = { 'N', 'a', 'N' };
|
|
static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 };
|
|
static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 };
|
|
static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 };
|
|
|
|
static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' };
|
|
static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 };
|
|
static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 };
|
|
static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 };
|
|
static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 };
|
|
|
|
Symbol token;
|
|
const byte* const* encodings;
|
|
size_t length;
|
|
if (value == JSON_Infinity)
|
|
{
|
|
token = T_INFINITY;
|
|
encodings = infinityEncodings;
|
|
length = sizeof(ninfUTF8) - 1/* - */;
|
|
}
|
|
else if (value == JSON_NegativeInfinity)
|
|
{
|
|
token = T_NEGATIVE_INFINITY;
|
|
encodings = negativeInfinityEncodings;
|
|
length = sizeof(ninfUTF8);
|
|
}
|
|
else
|
|
{
|
|
token = T_NAN;
|
|
encodings = nanEncodings;
|
|
length = sizeof(nanUTF8);
|
|
}
|
|
return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer)
|
|
{
|
|
static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 };
|
|
static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer)
|
|
{
|
|
static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 };
|
|
static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer)
|
|
{
|
|
static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 };
|
|
static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer)
|
|
{
|
|
static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 };
|
|
static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer)
|
|
{
|
|
static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 };
|
|
static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer)
|
|
{
|
|
static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 };
|
|
static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
|
|
|
|
return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1);
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces)
|
|
{
|
|
JSON_Status status = JSON_Failure;
|
|
if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
|
|
{
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
|
|
status = JSON_Writer_OutputSpaces(writer, numberOfSpaces);
|
|
SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer)
|
|
{
|
|
static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
|
|
static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF };
|
|
|
|
static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT };
|
|
static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 };
|
|
static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
|
|
static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 };
|
|
|
|
JSON_Status status = JSON_Failure;
|
|
if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
|
|
{
|
|
const byte* const* encodings;
|
|
size_t length;
|
|
size_t encodedLength;
|
|
SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
|
|
if (GET_FLAGS(writer->flags, WRITER_USE_CRLF))
|
|
{
|
|
encodings = crlfEncodings;
|
|
length = 2;
|
|
}
|
|
else
|
|
{
|
|
encodings = lfEncodings;
|
|
length = 1;
|
|
}
|
|
encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
|
|
if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
|
|
status = JSON_Success;
|
|
SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
#endif /* JSON_NO_WRITER */
|
|
|
|
/******************** Miscellaneous API ********************/
|
|
|
|
const JSON_Version* JSON_CALL JSON_LibraryVersion(void)
|
|
{
|
|
static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION };
|
|
return &version;
|
|
}
|
|
|
|
const char* JSON_CALL JSON_ErrorString(JSON_Error error)
|
|
{
|
|
/* This array must match the order and number of the JSON_Error enum. */
|
|
static const char* errorStrings[] =
|
|
{
|
|
/* JSON_Error_None */ "no error",
|
|
/* JSON_Error_OutOfMemory */ "could not allocate enough memory",
|
|
/* JSON_Error_AbortedByHandler */ "the operation was aborted by a handler",
|
|
/* JSON_Error_BOMNotAllowed */ "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627",
|
|
/* JSON_Error_InvalidEncodingSequence */ "the input contains a byte or sequence of bytes that is not valid for the input encoding",
|
|
/* JSON_Error_UnknownToken */ "the input contains an unknown token",
|
|
/* JSON_Error_UnexpectedToken */ "the input contains an unexpected token",
|
|
/* JSON_Error_IncompleteToken */ "the input ends in the middle of a token",
|
|
/* JSON_Error_MoreTokensExpected */ "the input ends when more tokens are expected",
|
|
/* JSON_Error_UnescapedControlCharacter */ "the input contains a string containing an unescaped control character (U+0000 - U+001F)",
|
|
/* JSON_Error_InvalidEscapeSequence */ "the input contains a string containing an invalid escape sequence",
|
|
/* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint",
|
|
/* JSON_Error_TooLongString */ "the input contains a string that is too long",
|
|
/* JSON_Error_InvalidNumber */ "the input contains an invalid number",
|
|
/* JSON_Error_TooLongNumber */ "the input contains a number that is too long",
|
|
/* JSON_Error_DuplicateObjectMember */ "the input contains an object with duplicate members",
|
|
/* JSON_Error_StoppedAfterEmbeddedDocument */ "the end of the embedded document was reached"
|
|
};
|
|
return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0])))
|
|
? errorStrings[error]
|
|
: "";
|
|
}
|
|
|
|
static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE);
|
|
|
|
JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void)
|
|
{
|
|
return (JSON_Encoding)(((byte*)&endianEncodings)[1]);
|
|
}
|
|
|
|
JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void)
|
|
{
|
|
return (JSON_Encoding)(((byte*)&endianEncodings)[0]);
|
|
}
|