From 5e597858974ec8977f9af5ae5f0fc939dd7211a2 Mon Sep 17 00:00:00 2001 From: SABITHSAHEB Date: Wed, 17 Jun 2026 21:20:39 +0530 Subject: [PATCH] validate utf-16 surrogate halves in decodeUnicodeCodePoint --- src/lib_json/json_reader.cpp | 16 ++++++++++++++++ src/test_lib_json/main.cpp | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 164d41d6f..18e3f845e 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -678,6 +678,10 @@ bool Reader::decodeUnicodeCodePoint(Token& token, Location& current, if (*(current++) == '\\' && *(current++) == 'u') { unsigned int surrogatePair; if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + if (surrogatePair < 0xDC00 || surrogatePair > 0xDFFF) + return addError("expecting a low surrogate (DC00-DFFF) to complete " + "the unicode surrogate pair", + token, current); unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else return false; @@ -685,6 +689,10 @@ bool Reader::decodeUnicodeCodePoint(Token& token, Location& current, return addError("expecting another \\u token to begin the second half of " "a unicode surrogate pair", token, current); + } else if (unicode >= 0xDC00 && unicode <= 0xDFFF) { + return addError("unexpected low surrogate (DC00-DFFF); a high surrogate " + "(D800-DBFF) must come first", + token, current); } return true; } @@ -1759,6 +1767,10 @@ bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current, if (*(current++) == '\\' && *(current++) == 'u') { unsigned int surrogatePair; if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + if (surrogatePair < 0xDC00 || surrogatePair > 0xDFFF) + return addError("expecting a low surrogate (DC00-DFFF) to complete " + "the unicode surrogate pair", + token, current); unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else return false; @@ -1766,6 +1778,10 @@ bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current, return addError("expecting another \\u token to begin the second half of " "a unicode surrogate pair", token, current); + } else if (unicode >= 0xDC00 && unicode <= 0xDFFF) { + return addError("unexpected low surrogate (DC00-DFFF); a high surrogate " + "(D800-DBFF) must come first", + token, current); } return true; } diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp index 90025b443..e2e4bbf1c 100644 --- a/src/test_lib_json/main.cpp +++ b/src/test_lib_json/main.cpp @@ -3322,6 +3322,24 @@ JSONTEST_FIXTURE_LOCAL(CharReaderTest, parseString) { "second half of a unicode surrogate pair\n" "See Line 1, Column 12 for detail.\n"); } + { + char const doc[] = R"([ "\uD801\u0041" ])"; + bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs); + JSONTEST_ASSERT(!ok); + JSONTEST_ASSERT(errs == "* Line 1, Column 3\n" + " expecting a low surrogate (DC00-DFFF) to " + "complete the unicode surrogate pair\n" + "See Line 1, Column 16 for detail.\n"); + } + { + char const doc[] = R"([ "\uDC00" ])"; + bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs); + JSONTEST_ASSERT(!ok); + JSONTEST_ASSERT(errs == "* Line 1, Column 3\n" + " unexpected low surrogate (DC00-DFFF); a high " + "surrogate (D800-DBFF) must come first\n" + "See Line 1, Column 10 for detail.\n"); + } { char const doc[] = R"([ "\ua3t@" ])"; bool ok = reader->parse(doc, doc + std::strlen(doc), &root, &errs);