0

Clarify how base/json deviates from the JSON spec

This commit does not change behavior, other than adding new tests.

Also remove a couple of stale TODOs from the opening json_reader.h
comment, that proposed deviating further from the JSON spec. "git blame"
says that these TODOs were added by commit e724599d3f on 2008-07-29,
more than 11 years ago. They're not going to be done.

Bug: 1069271
Change-Id: I2e1221ca8054c7912fcdf2723d8e3e114a580898
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2139533
Commit-Queue: Nigel Tao <nigeltao@chromium.org>
Reviewed-by: Robert Sesek <rsesek@chromium.org>
Cr-Commit-Position: refs/heads/master@{#758065}
This commit is contained in:
Nigel Tao
2020-04-09 23:18:44 +00:00
committed by Commit Bot
parent aee5b35c6f
commit 71c958dbc3
3 changed files with 88 additions and 51 deletions

@ -1,29 +1,39 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// A JSON parser, converting from a base::StringPiece to a base::Value.
//
// A JSON parser. Converts strings of JSON into a Value object (see
// base/values.h).
// http://www.ietf.org/rfc/rfc4627.txt?number=4627
// The JSON spec is:
// https://tools.ietf.org/rfc/rfc8259.txt
// which obsoletes the earlier RFCs 4627, 7158 and 7159.
//
// Known limitations/deviations from the RFC:
// - Only knows how to parse ints within the range of a signed 32 bit int and
// decimal numbers within a double.
// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16
// (BE or LE) and UTF-32 (BE or LE) as well.
// - We limit nesting to 100 levels to prevent stack overflow (this is allowed
// by the RFC).
// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
// UTF-8 string for the JSONReader::JsonToValue() function may start with a
// UTF-8 BOM (0xEF, 0xBB, 0xBF).
// To avoid the function from mis-treating a UTF-8 BOM as an invalid
// character, the function skips a Unicode BOM at the beginning of the
// Unicode string (converted from the input UTF-8 string) before parsing it.
// This RFC should be equivalent to the informal spec:
// https://www.json.org/json-en.html
//
// TODO(tc): Add a parsing option to to relax object keys being wrapped in
// double quotes
// TODO(tc): Add an option to disable comment stripping
// Implementation choices permitted by the RFC:
// - Nesting is limited (to a configurable depth, 200 by default).
// - Numbers are limited to those representable by a finite double. The
// conversion from a JSON number (in the base::StringPiece input) to a
// double-flavored base::Value may also be lossy.
// - The input (which must be UTF-8) may begin with a BOM (Byte Order Mark).
// - Duplicate object keys (strings) are silently allowed. Last key-value pair
// wins. Previous pairs are discarded.
//
// Configurable (see the JSONParserOptions type) deviations from the RFC:
// - Allow trailing commas: "[1,2,]".
// - Replace invalid Unicode with U+FFFD REPLACEMENT CHARACTER.
//
// Non-configurable deviations from the RFC:
// - Allow "// etc\n" and "/* etc */" C-style comments.
// - Allow ASCII control characters, including literal (not escaped) NUL bytes
// and new lines, within a JSON string.
// - Allow "\\v" escapes within a JSON string, producing a vertical tab.
// - Allow "\\x23" escapes within a JSON string. Subtly, the 2-digit hex value
// is a Unicode code point, not a UTF-8 byte. For example, "\\xFF" in the
// JSON source decodes to a base::Value whose string contains "\xC3\xBF", the
// UTF-8 encoding of U+00FF LATIN SMALL LETTER Y WITH DIAERESIS. Converting
// from UTF-8 to UTF-16, e.g. via UTF8ToWide, will recover a 16-bit 0x00FF.
#ifndef BASE_JSON_JSON_READER_H_
#define BASE_JSON_JSON_READER_H_
@ -44,7 +54,7 @@ class JSONParser;
}
enum JSONParserOptions {
// Parses the input strictly according to RFC 4627, except for where noted
// Parses the input strictly according to RFC 8259, except for where noted
// above.
JSON_PARSE_RFC = 0,

@ -14,6 +14,7 @@
#include "base/path_service.h"
#include "base/stl_util.h"
#include "base/strings/string_piece.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "build/build_config.h"
@ -91,7 +92,7 @@ TEST(JSONReaderTest, Ints) {
}
TEST(JSONReaderTest, NonDecimalNumbers) {
// According to RFC4627, oct, hex, and leading zeros are invalid JSON.
// According to RFC 8259, oct, hex, and leading zeros are invalid JSON.
EXPECT_FALSE(JSONReader::Read("043"));
EXPECT_FALSE(JSONReader::Read("0x43"));
EXPECT_FALSE(JSONReader::Read("00"));
@ -113,61 +114,61 @@ TEST(JSONReaderTest, LargeIntPromotion) {
// storage as doubles
Optional<Value> root = JSONReader::Read("2147483648");
ASSERT_TRUE(root);
double double_val;
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(2147483648.0, double_val);
EXPECT_DOUBLE_EQ(2147483648.0, root->GetDouble());
root = JSONReader::Read("-2147483649");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(-2147483649.0, double_val);
EXPECT_DOUBLE_EQ(-2147483649.0, root->GetDouble());
}
TEST(JSONReaderTest, LargerIntIsLossy) {
// Parse LONG_MAX as a JSON number (not a JSON string). The result of the
// parse is a base::Value, either a (32-bit) int or a (64-bit) double.
// LONG_MAX would overflow an int and can only be approximated by a double.
// In this case, parsing is lossy.
const char* etc807 = "9223372036854775807";
const char* etc808 = "9223372036854775808.000000";
Optional<Value> root = JSONReader::Read(etc807);
ASSERT_TRUE(root);
ASSERT_FALSE(root->is_int());
ASSERT_TRUE(root->is_double());
// We use StringPrintf instead of NumberToString, because the NumberToString
// function does not let you specify the precision, and its default output,
// "9.223372036854776e+18", isn't precise enough to see the lossiness.
EXPECT_EQ(std::string(etc808), StringPrintf("%f", root->GetDouble()));
}
TEST(JSONReaderTest, Doubles) {
Optional<Value> root = JSONReader::Read("43.1");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(43.1, double_val);
EXPECT_DOUBLE_EQ(43.1, root->GetDouble());
root = JSONReader::Read("4.3e-1");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(.43, double_val);
EXPECT_DOUBLE_EQ(.43, root->GetDouble());
root = JSONReader::Read("2.1e0");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(2.1, double_val);
EXPECT_DOUBLE_EQ(2.1, root->GetDouble());
root = JSONReader::Read("2.1e+0001");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(21.0, double_val);
EXPECT_DOUBLE_EQ(21.0, root->GetDouble());
root = JSONReader::Read("0.01");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(0.01, double_val);
EXPECT_DOUBLE_EQ(0.01, root->GetDouble());
root = JSONReader::Read("1.00");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_double());
double_val = 0.0;
EXPECT_TRUE(root->GetAsDouble(&double_val));
EXPECT_DOUBLE_EQ(1.0, double_val);
EXPECT_DOUBLE_EQ(1.0, root->GetDouble());
// This is syntaxtically valid, but out of range of a double.
auto value_with_error =
@ -191,7 +192,8 @@ TEST(JSONReaderTest, ExponentialNumbers) {
EXPECT_FALSE(JSONReader::Read("1e1.0"));
}
TEST(JSONReaderTest, InvalidNAN) {
TEST(JSONReaderTest, InvalidInfNAN) {
// The largest finite double is roughly 1.8e308.
EXPECT_FALSE(JSONReader::Read("1e1000"));
EXPECT_FALSE(JSONReader::Read("-1e1000"));
EXPECT_FALSE(JSONReader::Read("NaN"));
@ -200,6 +202,8 @@ TEST(JSONReaderTest, InvalidNAN) {
}
TEST(JSONReaderTest, InvalidNumbers) {
EXPECT_TRUE(JSONReader::Read("4.3"));
EXPECT_FALSE(JSONReader::Read("4."));
EXPECT_FALSE(JSONReader::Read("4.3.1"));
EXPECT_FALSE(JSONReader::Read("4e3.1"));
EXPECT_FALSE(JSONReader::Read("4.a"));
@ -234,12 +238,12 @@ TEST(JSONReaderTest, BasicStringEscapes) {
TEST(JSONReaderTest, UnicodeEscapes) {
// Test hex and unicode escapes including the null character.
Optional<Value> root = JSONReader::Read("\"\\x41\\x00\\u1234\\u0000\"");
Optional<Value> root = JSONReader::Read("\"\\x41\\xFF\\x00\\u1234\\u0000\"");
ASSERT_TRUE(root);
EXPECT_TRUE(root->is_string());
std::string str_val;
EXPECT_TRUE(root->GetAsString(&str_val));
EXPECT_EQ(std::wstring(L"A\0\x1234\0", 4), UTF8ToWide(str_val));
EXPECT_EQ(std::wstring(L"A\x00FF\0\x1234\0", 5), UTF8ToWide(str_val));
// The contents of a Unicode escape may only be four hex chars. Previously the
// parser accepted things like "0x01" and "0X01".
@ -435,6 +439,16 @@ TEST(JSONReaderTest, DictionaryKeysWithPeriods) {
EXPECT_EQ(1, *integer_value);
}
TEST(JSONReaderTest, DuplicateKeys) {
Optional<Value> dict_val = JSONReader::Read("{\"x\":1,\"x\":2,\"y\":3}");
ASSERT_TRUE(dict_val);
ASSERT_TRUE(dict_val->is_dict());
auto integer_value = dict_val->FindIntKey("x");
ASSERT_TRUE(integer_value);
EXPECT_EQ(2, *integer_value);
}
TEST(JSONReaderTest, InvalidDictionaries) {
// No closing brace.
EXPECT_FALSE(JSONReader::Read("{\"a\": true"));
@ -723,6 +737,19 @@ TEST(JSONReaderTest, IllegalTrailingNull) {
EXPECT_EQ(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, reader.error_code());
}
TEST(JSONReaderTest, ASCIIControlCodes) {
// A literal NUL byte or a literal new line, in a JSON string, should be
// rejected. RFC 8259 section 7 says "the characters that MUST be escaped
// [include]... the control characters (U+0000 through U+001F)".
//
// Nonetheless, we accept them, for backwards compatibility.
const char json[] = {'"', 'a', '\0', 'b', '\n', 'c', '"'};
Optional<Value> root = JSONReader::Read(std::string(json, sizeof(json)));
ASSERT_TRUE(root);
ASSERT_TRUE(root->is_string());
EXPECT_EQ(5u, root->GetString().length());
}
TEST(JSONReaderTest, MaxNesting) {
std::string json(R"({"outer": { "inner": {"foo": true}}})");
EXPECT_FALSE(JSONReader::Read(json, JSON_PARSE_RFC, 3));

@ -52,7 +52,7 @@ BASE_EXPORT std::string GetQuotedJSONString(StringPiece16 str);
// interpret it as UTF-16 and convert it to UTF-8.
//
// The output of this function takes the *appearance* of JSON but is not in
// fact valid according to RFC 4627.
// fact valid according to RFC 8259.
BASE_EXPORT std::string EscapeBytesAsInvalidJSONString(StringPiece str,
bool put_in_quotes);