From 38ab4d2db692529ea4192f491592ea4e07a144c2 Mon Sep 17 00:00:00 2001 From: Paula Fernandez Date: Tue, 26 May 2026 14:41:37 +0200 Subject: [PATCH 1/2] Fix: simplecpp ## fails to expand function-like macro when '(' is not adjacent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the ## operator concatenates two tokens to form a function-like macro name (e.g. PREFIX_ ## kind → PREFIX_SCALAR), simplecpp looked for the argument list '(...)' only at B->next. In PAR-style indirection patterns the '(' is separated from B by a comma or a variadic parameter token: #define PAR(a, ...) a __VA_ARGS__ #define PREFIX_SCALAR(T, N) T N #define DISPATCH(kind, ...) PAR(PREFIX_ ## kind, (__VA_ARGS__)) DISPATCH(SCALAR, int, x) // was: [unknownMacro] — now: int x Because '(' was not found, expansion was aborted and the macro was reported as unknownMacro, causing cppcheck to skip the entire translation unit. Fix: when B->next is not '(' and we are in the appendTokens context (expandResult==false), walk forward on the same line skipping ',' separators and resolving named parameter tokens via expandArg(). The first '(' found (literally or as the head of an expanded argument) is used as lpar and passed to appendTokens() as before. The forwardScan flag ensures expandToken() is called on the result even when expandResult is false. The forward scan is restricted to expandResult==false to avoid unintended side-effects in the main expansion loop. --- simplecpp.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 7f65309d..cd64ac35 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -2381,16 +2381,63 @@ namespace simplecpp { output.deleteToken(A); TokenList tokens(files); tokens.push_back(new Token(strAB, tok->location)); - // for function like macros, push the (...) - if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') { - const MacroMap::const_iterator it = macros.find(strAB); - if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) { - const Token * const tok2 = appendTokens(tokens, loc, B->next, macros, expandedmacros, parametertokens); + // If strAB names a function-like macro, locate and append its argument list '(...)' + // from the remaining replacement tokens, then expand the whole call. + // 'forwardScan' is true when the argument list was consumed via the forward scan + // path below (not adjacent), so that expandToken() is called instead of + // the normal takeTokens() path even when expandResult is false. + bool forwardScan = false; + const MacroMap::const_iterator it = macros.find(strAB); + const bool isFunctionLikeMacro = (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()); + if (tokensB.empty() && isFunctionLikeMacro) { + // Fast path: '(' is the very next token after B on the same line. + const Token *lpar = (sameline(B, B->next) && B->next->op == '(') ? B->next : nullptr; + if (!lpar && !expandResult) { + // Forward-scan path: '(' is not immediately adjacent to B. + // This handles PAR-style indirection patterns such as: + // #define PAR(a, ...) a __VA_ARGS__ + // #define DISPATCH(kind, ...) PAR(PREFIX_ ## kind, (__VA_ARGS__)) + // where the '(' for PREFIX_kind belongs to the __VA_ARGS__ parameter + // and is separated from B by a comma in the replacement text. + // Only active in the appendTokens context (expandResult==false) to + // avoid unintended side-effects inside the main expansion loop. + const Token *scan = nextTok; + while (scan && sameline(B, scan)) { + if (scan->op == '(') { + // Found a literal '(' after skipping separators. + lpar = scan; + forwardScan = true; + break; + } + if (scan->op == ',') { + // Argument separator — skip and keep scanning. + scan = scan->next; + continue; + } + if (scan->name) { + // Named token: expand it and check whether it starts with '(...)' + // (covers the case where __VA_ARGS__ expands to a parenthesised list). + TokenList expanded(files); + if (expandArg(expanded, scan, loc, macros, expandedmacros, parametertokens) && + expanded.cfront() && expanded.cfront()->op == '(') { + for (Token *t = expanded.front(); t; t = t->next) + t->location = loc; + tokens.takeTokens(expanded); + nextTok = scan->next; + forwardScan = true; + } + break; // stop at any name token, whether consumed or not + } + break; // any other operator — stop scan + } + } + if (lpar) { + const Token * const tok2 = appendTokens(tokens, loc, lpar, macros, expandedmacros, parametertokens); if (tok2) nextTok = tok2->next; } } - if (expandResult) + if (expandResult || forwardScan) expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens); else output.takeTokens(tokens); From ffb123e4835bbf9cb1bbce551b56e39fa5e74474 Mon Sep 17 00:00:00 2001 From: Paula Fernandez Date: Thu, 28 May 2026 10:31:56 +0200 Subject: [PATCH 2/2] test: add regression test for ## PAR-style indirection (hashhash_funclike_par_indirection) Covers the fix in expandHashHash(): when ## concatenation produces a function-like macro name but '(' is not immediately adjacent in the replacement text (hidden behind a comma/parameter), the forward scan must locate '(' and complete the expansion. #define PAR(a, ...) a __VA_ARGS__ #define PREFIX_SCALAR(T, N) T N #define DISPATCH(kind, ...) PAR(PREFIX_ ## kind, (__VA_ARGS__)) DISPATCH(SCALAR, int, x) // expected: int x --- test.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test.cpp b/test.cpp index cd6a4db5..0b7c1819 100644 --- a/test.cpp +++ b/test.cpp @@ -1846,6 +1846,27 @@ static void hashhash_universal_character() ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A': Combining '\\u01' and '04' yields universal character '\\u0104'. This is undefined behavior according to C standard chapter 5.1.1.2, paragraph 4.\n", toString(outputList)); } +// Regression test: ## result is a function-like macro whose '(' is not adjacent. +// PAR-style indirection hides '(' behind a comma/parameter in the replacement text. +// Previously caused [unknownMacro] and aborted TU expansion. +static void hashhash_funclike_par_indirection() +{ + // Single dispatch: PREFIX_ ## kind → PREFIX_SCALAR, '(' separated by ',' + ASSERT_EQUALS("\n\n\nint x", + preprocess("#define PAR(a, ...) a __VA_ARGS__\n" + "#define PREFIX_SCALAR(T, N) T N\n" + "#define DISPATCH(kind, ...) PAR(PREFIX_ ## kind, (__VA_ARGS__))\n" + "DISPATCH(SCALAR, int, x)\n")); + + // Chained: two DISPATCH calls with different PREFIX_ specialisations + ASSERT_EQUALS("\n\n\n\nint x float arr [ 10 ]", + preprocess("#define PAR(a, ...) a __VA_ARGS__\n" + "#define PREFIX_SCALAR(T, N) T N\n" + "#define PREFIX_ARRAY(T, N, S) T N[S]\n" + "#define DISPATCH(kind, ...) PAR(PREFIX_ ## kind, (__VA_ARGS__))\n" + "DISPATCH(SCALAR, int, x) DISPATCH(ARRAY, float, arr, 10)\n")); +} + static void has_include_1() { const char code[] = "#ifdef __has_include\n" @@ -3973,6 +3994,7 @@ static void runTests(int argc, char **argv, Input input) // character name is produced by token concatenation (6.10.3.3), // the behavior is undefined." TEST_CASE(hashhash_universal_character); + TEST_CASE(hashhash_funclike_par_indirection); // PAR-style ## indirection: '(' not adjacent to ## result // c++17 __has_include TEST_CASE(has_include_1);