Clarify how base/json deviates from the JSON spec
This commit does not change behavior, other than adding new tests. Also remove a couple of stale TODOs from the opening json_reader.h comment, that proposed deviating further from the JSON spec. "git blame" says that these TODOs were added by commite724599d3f
on 2008-07-29, more than 11 years ago. They're not going to be done. Bug:1069271
Change-Id: I2e1221ca8054c7912fcdf2723d8e3e114a580898 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2139533 Commit-Queue: Nigel Tao <nigeltao@chromium.org> Reviewed-by: Robert Sesek <rsesek@chromium.org> Cr-Commit-Position: refs/heads/master@{#758065}
This commit is contained in:
@ -1,29 +1,39 @@
|
||||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// A JSON parser, converting from a base::StringPiece to a base::Value.
|
||||
//
|
||||
// A JSON parser. Converts strings of JSON into a Value object (see
|
||||
// base/values.h).
|
||||
// http://www.ietf.org/rfc/rfc4627.txt?number=4627
|
||||
// The JSON spec is:
|
||||
// https://tools.ietf.org/rfc/rfc8259.txt
|
||||
// which obsoletes the earlier RFCs 4627, 7158 and 7159.
|
||||
//
|
||||
// Known limitations/deviations from the RFC:
|
||||
// - Only knows how to parse ints within the range of a signed 32 bit int and
|
||||
// decimal numbers within a double.
|
||||
// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16
|
||||
// (BE or LE) and UTF-32 (BE or LE) as well.
|
||||
// - We limit nesting to 100 levels to prevent stack overflow (this is allowed
|
||||
// by the RFC).
|
||||
// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
|
||||
// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
|
||||
// UTF-8 string for the JSONReader::JsonToValue() function may start with a
|
||||
// UTF-8 BOM (0xEF, 0xBB, 0xBF).
|
||||
// To avoid the function from mis-treating a UTF-8 BOM as an invalid
|
||||
// character, the function skips a Unicode BOM at the beginning of the
|
||||
// Unicode string (converted from the input UTF-8 string) before parsing it.
|
||||
// This RFC should be equivalent to the informal spec:
|
||||
// https://www.json.org/json-en.html
|
||||
//
|
||||
// TODO(tc): Add a parsing option to to relax object keys being wrapped in
|
||||
// double quotes
|
||||
// TODO(tc): Add an option to disable comment stripping
|
||||
// Implementation choices permitted by the RFC:
|
||||
// - Nesting is limited (to a configurable depth, 200 by default).
|
||||
// - Numbers are limited to those representable by a finite double. The
|
||||
// conversion from a JSON number (in the base::StringPiece input) to a
|
||||
// double-flavored base::Value may also be lossy.
|
||||
// - The input (which must be UTF-8) may begin with a BOM (Byte Order Mark).
|
||||
// - Duplicate object keys (strings) are silently allowed. Last key-value pair
|
||||
// wins. Previous pairs are discarded.
|
||||
//
|
||||
// Configurable (see the JSONParserOptions type) deviations from the RFC:
|
||||
// - Allow trailing commas: "[1,2,]".
|
||||
// - Replace invalid Unicode with U+FFFD REPLACEMENT CHARACTER.
|
||||
//
|
||||
// Non-configurable deviations from the RFC:
|
||||
// - Allow "// etc\n" and "/* etc */" C-style comments.
|
||||
// - Allow ASCII control characters, including literal (not escaped) NUL bytes
|
||||
// and new lines, within a JSON string.
|
||||
// - Allow "\\v" escapes within a JSON string, producing a vertical tab.
|
||||
// - Allow "\\x23" escapes within a JSON string. Subtly, the 2-digit hex value
|
||||
// is a Unicode code point, not a UTF-8 byte. For example, "\\xFF" in the
|
||||
// JSON source decodes to a base::Value whose string contains "\xC3\xBF", the
|
||||
// UTF-8 encoding of U+00FF LATIN SMALL LETTER Y WITH DIAERESIS. Converting
|
||||
// from UTF-8 to UTF-16, e.g. via UTF8ToWide, will recover a 16-bit 0x00FF.
|
||||
|
||||
#ifndef BASE_JSON_JSON_READER_H_
|
||||
#define BASE_JSON_JSON_READER_H_
|
||||
@ -44,7 +54,7 @@ class JSONParser;
|
||||
}
|
||||
|
||||
enum JSONParserOptions {
|
||||
// Parses the input strictly according to RFC 4627, except for where noted
|
||||
// Parses the input strictly according to RFC 8259, except for where noted
|
||||
// above.
|
||||
JSON_PARSE_RFC = 0,
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "base/path_service.h"
|
||||
#include "base/stl_util.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "base/strings/stringprintf.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "base/values.h"
|
||||
#include "build/build_config.h"
|
||||
@ -91,7 +92,7 @@ TEST(JSONReaderTest, Ints) {
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, NonDecimalNumbers) {
|
||||
// According to RFC4627, oct, hex, and leading zeros are invalid JSON.
|
||||
// According to RFC 8259, oct, hex, and leading zeros are invalid JSON.
|
||||
EXPECT_FALSE(JSONReader::Read("043"));
|
||||
EXPECT_FALSE(JSONReader::Read("0x43"));
|
||||
EXPECT_FALSE(JSONReader::Read("00"));
|
||||
@ -113,61 +114,61 @@ TEST(JSONReaderTest, LargeIntPromotion) {
|
||||
// storage as doubles
|
||||
Optional<Value> root = JSONReader::Read("2147483648");
|
||||
ASSERT_TRUE(root);
|
||||
double double_val;
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(2147483648.0, double_val);
|
||||
EXPECT_DOUBLE_EQ(2147483648.0, root->GetDouble());
|
||||
root = JSONReader::Read("-2147483649");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(-2147483649.0, double_val);
|
||||
EXPECT_DOUBLE_EQ(-2147483649.0, root->GetDouble());
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, LargerIntIsLossy) {
|
||||
// Parse LONG_MAX as a JSON number (not a JSON string). The result of the
|
||||
// parse is a base::Value, either a (32-bit) int or a (64-bit) double.
|
||||
// LONG_MAX would overflow an int and can only be approximated by a double.
|
||||
// In this case, parsing is lossy.
|
||||
const char* etc807 = "9223372036854775807";
|
||||
const char* etc808 = "9223372036854775808.000000";
|
||||
Optional<Value> root = JSONReader::Read(etc807);
|
||||
ASSERT_TRUE(root);
|
||||
ASSERT_FALSE(root->is_int());
|
||||
ASSERT_TRUE(root->is_double());
|
||||
// We use StringPrintf instead of NumberToString, because the NumberToString
|
||||
// function does not let you specify the precision, and its default output,
|
||||
// "9.223372036854776e+18", isn't precise enough to see the lossiness.
|
||||
EXPECT_EQ(std::string(etc808), StringPrintf("%f", root->GetDouble()));
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, Doubles) {
|
||||
Optional<Value> root = JSONReader::Read("43.1");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(43.1, double_val);
|
||||
EXPECT_DOUBLE_EQ(43.1, root->GetDouble());
|
||||
|
||||
root = JSONReader::Read("4.3e-1");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(.43, double_val);
|
||||
EXPECT_DOUBLE_EQ(.43, root->GetDouble());
|
||||
|
||||
root = JSONReader::Read("2.1e0");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(2.1, double_val);
|
||||
EXPECT_DOUBLE_EQ(2.1, root->GetDouble());
|
||||
|
||||
root = JSONReader::Read("2.1e+0001");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(21.0, double_val);
|
||||
EXPECT_DOUBLE_EQ(21.0, root->GetDouble());
|
||||
|
||||
root = JSONReader::Read("0.01");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(0.01, double_val);
|
||||
EXPECT_DOUBLE_EQ(0.01, root->GetDouble());
|
||||
|
||||
root = JSONReader::Read("1.00");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_double());
|
||||
double_val = 0.0;
|
||||
EXPECT_TRUE(root->GetAsDouble(&double_val));
|
||||
EXPECT_DOUBLE_EQ(1.0, double_val);
|
||||
EXPECT_DOUBLE_EQ(1.0, root->GetDouble());
|
||||
|
||||
// This is syntaxtically valid, but out of range of a double.
|
||||
auto value_with_error =
|
||||
@ -191,7 +192,8 @@ TEST(JSONReaderTest, ExponentialNumbers) {
|
||||
EXPECT_FALSE(JSONReader::Read("1e1.0"));
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, InvalidNAN) {
|
||||
TEST(JSONReaderTest, InvalidInfNAN) {
|
||||
// The largest finite double is roughly 1.8e308.
|
||||
EXPECT_FALSE(JSONReader::Read("1e1000"));
|
||||
EXPECT_FALSE(JSONReader::Read("-1e1000"));
|
||||
EXPECT_FALSE(JSONReader::Read("NaN"));
|
||||
@ -200,6 +202,8 @@ TEST(JSONReaderTest, InvalidNAN) {
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, InvalidNumbers) {
|
||||
EXPECT_TRUE(JSONReader::Read("4.3"));
|
||||
EXPECT_FALSE(JSONReader::Read("4."));
|
||||
EXPECT_FALSE(JSONReader::Read("4.3.1"));
|
||||
EXPECT_FALSE(JSONReader::Read("4e3.1"));
|
||||
EXPECT_FALSE(JSONReader::Read("4.a"));
|
||||
@ -234,12 +238,12 @@ TEST(JSONReaderTest, BasicStringEscapes) {
|
||||
|
||||
TEST(JSONReaderTest, UnicodeEscapes) {
|
||||
// Test hex and unicode escapes including the null character.
|
||||
Optional<Value> root = JSONReader::Read("\"\\x41\\x00\\u1234\\u0000\"");
|
||||
Optional<Value> root = JSONReader::Read("\"\\x41\\xFF\\x00\\u1234\\u0000\"");
|
||||
ASSERT_TRUE(root);
|
||||
EXPECT_TRUE(root->is_string());
|
||||
std::string str_val;
|
||||
EXPECT_TRUE(root->GetAsString(&str_val));
|
||||
EXPECT_EQ(std::wstring(L"A\0\x1234\0", 4), UTF8ToWide(str_val));
|
||||
EXPECT_EQ(std::wstring(L"A\x00FF\0\x1234\0", 5), UTF8ToWide(str_val));
|
||||
|
||||
// The contents of a Unicode escape may only be four hex chars. Previously the
|
||||
// parser accepted things like "0x01" and "0X01".
|
||||
@ -435,6 +439,16 @@ TEST(JSONReaderTest, DictionaryKeysWithPeriods) {
|
||||
EXPECT_EQ(1, *integer_value);
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, DuplicateKeys) {
|
||||
Optional<Value> dict_val = JSONReader::Read("{\"x\":1,\"x\":2,\"y\":3}");
|
||||
ASSERT_TRUE(dict_val);
|
||||
ASSERT_TRUE(dict_val->is_dict());
|
||||
|
||||
auto integer_value = dict_val->FindIntKey("x");
|
||||
ASSERT_TRUE(integer_value);
|
||||
EXPECT_EQ(2, *integer_value);
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, InvalidDictionaries) {
|
||||
// No closing brace.
|
||||
EXPECT_FALSE(JSONReader::Read("{\"a\": true"));
|
||||
@ -723,6 +737,19 @@ TEST(JSONReaderTest, IllegalTrailingNull) {
|
||||
EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, reader.error_code());
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, ASCIIControlCodes) {
|
||||
// A literal NUL byte or a literal new line, in a JSON string, should be
|
||||
// rejected. RFC 8259 section 7 says "the characters that MUST be escaped
|
||||
// [include]... the control characters (U+0000 through U+001F)".
|
||||
//
|
||||
// Nonetheless, we accept them, for backwards compatibility.
|
||||
const char json[] = {'"', 'a', '\0', 'b', '\n', 'c', '"'};
|
||||
Optional<Value> root = JSONReader::Read(std::string(json, sizeof(json)));
|
||||
ASSERT_TRUE(root);
|
||||
ASSERT_TRUE(root->is_string());
|
||||
EXPECT_EQ(5u, root->GetString().length());
|
||||
}
|
||||
|
||||
TEST(JSONReaderTest, MaxNesting) {
|
||||
std::string json(R"({"outer": { "inner": {"foo": true}}})");
|
||||
EXPECT_FALSE(JSONReader::Read(json, JSON_PARSE_RFC, 3));
|
||||
|
@ -52,7 +52,7 @@ BASE_EXPORT std::string GetQuotedJSONString(StringPiece16 str);
|
||||
// interpret it as UTF-16 and convert it to UTF-8.
|
||||
//
|
||||
// The output of this function takes the *appearance* of JSON but is not in
|
||||
// fact valid according to RFC 4627.
|
||||
// fact valid according to RFC 8259.
|
||||
BASE_EXPORT std::string EscapeBytesAsInvalidJSONString(StringPiece str,
|
||||
bool put_in_quotes);
|
||||
|
||||
|
Reference in New Issue
Block a user