From c4b6e37a55128e0cdc7ba1145bee62bb1d880f65 Mon Sep 17 00:00:00 2001 From: Tal500 Date: Wed, 1 Jan 2025 23:47:16 +0200 Subject: [PATCH 01/41] fix: use both absolute and relative header paths in header matching (#362) Co-authored-by: Tal Hadad --- .github/workflows/CI-unixish.yml | 17 ++++- .github/workflows/CI-windows.yml | 18 ++++- .gitignore | 3 + integration_test.py | 94 ++++++++++++++++++++++++ simplecpp.cpp | 121 +++++++++++++++++++++++++------ testutils.py | 57 +++++++++++++++ 6 files changed, 286 insertions(+), 24 deletions(-) create mode 100644 integration_test.py create mode 100644 testutils.py diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 050e1d74..9a0e8d8e 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -30,7 +30,18 @@ jobs: run: | sudo apt-get update sudo apt-get install libc++-18-dev - + + - name: Install missing software on macos + if: contains(matrix.os, 'macos') + run: | + brew install python3 + + - name: Install missing Python packages + run: | + python3 -m pip config set global.break-system-packages true + python3 -m pip install pip --upgrade + python3 -m pip install pytest + - name: make simplecpp run: make -j$(nproc) @@ -41,6 +52,10 @@ jobs: run: | make -j$(nproc) selfcheck + - name: integration test + run: | + python3 -m pytest integration_test.py + - name: Run CMake run: | cmake -S . -B cmake.output diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 1f78876d..50d5a84e 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -26,7 +26,18 @@ jobs: - name: Setup msbuild.exe uses: microsoft/setup-msbuild@v2 - + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + check-latest: true + + - name: Install missing Python packages + run: | + python -m pip install pip --upgrade || exit /b !errorlevel! + python -m pip install pytest || exit /b !errorlevel! + - name: Run cmake if: matrix.os == 'windows-2019' run: | @@ -48,4 +59,9 @@ jobs: - name: Selfcheck run: | .\${{ matrix.config }}\simplecpp.exe simplecpp.cpp -e || exit /b !errorlevel! + + - name: integration test + run: | + set SIMPLECPP_EXE_PATH=.\${{ matrix.config }}\simplecpp.exe + python -m pytest integration_test.py || exit /b !errorlevel! diff --git a/.gitignore b/.gitignore index 183545f1..113cf360 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,6 @@ testrunner # CLion /.idea /cmake-build-* + +# python +__pycache__/ diff --git a/integration_test.py b/integration_test.py new file mode 100644 index 00000000..0b2b0b38 --- /dev/null +++ b/integration_test.py @@ -0,0 +1,94 @@ +## test with python -m pytest integration_test.py + +import os +import pytest +from testutils import simplecpp, format_include_path_arg, format_include + +def __test_relative_header_create_header(dir, with_pragma_once=True): + header_file = os.path.join(dir, 'test.h') + with open(header_file, 'wt') as f: + f.write(f""" + {"#pragma once" if with_pragma_once else ""} + #ifndef TEST_H_INCLUDED + #define TEST_H_INCLUDED + #else + #error header_was_already_included + #endif + """) + return header_file, "error: #error header_was_already_included" + +def __test_relative_header_create_source(dir, include1, include2, is_include1_sys=False, is_include2_sys=False, inv=False): + if inv: + return __test_relative_header_create_source(dir, include1=include2, include2=include1, is_include1_sys=is_include2_sys, is_include2_sys=is_include1_sys) + ## otherwise + + src_file = os.path.join(dir, 'test.c') + with open(src_file, 'wt') as f: + f.write(f""" + #undef TEST_H_INCLUDED + #include {format_include(include1, is_include1_sys)} + #include {format_include(include2, is_include2_sys)} + """) + return src_file + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +def test_relative_header_1(tmpdir, with_pragma_once, is_sys): + _, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + + test_file = __test_relative_header_create_source(tmpdir, "test.h", "test.h", is_include1_sys=is_sys, is_include2_sys=is_sys) + + args = ([format_include_path_arg(tmpdir)] if is_sys else []) + [test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + + if with_pragma_once: + assert stderr == '' + else: + assert double_include_error in stderr + +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_2(tmpdir, inv): + header_file, _ = __test_relative_header_create_header(tmpdir) + + test_file = __test_relative_header_create_source(tmpdir, "test.h", header_file, inv=inv) + + args = [test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + assert stderr == '' + +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_3(tmpdir, is_sys, inv): + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + header_file, _ = __test_relative_header_create_header(test_subdir) + + test_file = __test_relative_header_create_source(tmpdir, "test_subdir/test.h", header_file, is_include1_sys=is_sys, inv=inv) + + args = [test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + + if is_sys: + assert "missing header: Header not found" in stderr + else: + assert stderr == '' + +@pytest.mark.parametrize("use_short_path", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_4(tmpdir, use_short_path, is_sys, inv): + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + header_file, _ = __test_relative_header_create_header(test_subdir) + if use_short_path: + header_file = "test_subdir/test.h" + + test_file = __test_relative_header_create_source(tmpdir, header_file, "test.h", is_include2_sys=is_sys, inv=inv) + + args = [format_include_path_arg(test_subdir), test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + assert stderr == '' diff --git a/simplecpp.cpp b/simplecpp.cpp index 3e9dda6c..20ae2528 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -43,6 +43,8 @@ #ifdef SIMPLECPP_WINDOWS #include #undef ERROR +#else +#include #endif #if __cplusplus >= 201103L @@ -147,6 +149,11 @@ static unsigned long long stringToULL(const std::string &s) return ret; } +static bool startsWith(const std::string &s, const std::string &p) +{ + return (s.size() >= p.size()) && std::equal(p.begin(), p.end(), s.begin()); +} + static bool endsWith(const std::string &s, const std::string &e) { return (s.size() >= e.size()) && std::equal(e.rbegin(), e.rend(), s.rbegin()); @@ -2680,6 +2687,46 @@ static bool isCpp17OrLater(const simplecpp::DUI &dui) return !std_ver.empty() && (std_ver >= "201703L"); } + +static std::string currentDirectoryOSCalc() { +#ifdef SIMPLECPP_WINDOWS + TCHAR NPath[MAX_PATH]; + GetCurrentDirectory(MAX_PATH, NPath); + return NPath; +#else + const std::size_t size = 1024; + char the_path[size]; + getcwd(the_path, size); + return the_path; +#endif +} + +static const std::string& currentDirectory() { + static const std::string curdir = simplecpp::simplifyPath(currentDirectoryOSCalc()); + return curdir; +} + +static std::string toAbsolutePath(const std::string& path) { + if (path.empty()) { + return path;// preserve error file path that is indicated by an empty string + } + if (!isAbsolutePath(path)) { + return currentDirectory() + "/" + path; + } + // otherwise + return path; +} + +static std::pair extractRelativePathFromAbsolute(const std::string& absolutepath) { + static const std::string prefix = currentDirectory() + "/"; + if (startsWith(absolutepath, prefix)) { + const std::size_t size = prefix.size(); + return std::make_pair(absolutepath.substr(size, absolutepath.size() - size), true); + } + // otherwise + return std::make_pair("", false); +} + static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI &dui) { @@ -3098,9 +3145,12 @@ static std::string openHeader(std::ifstream &f, const std::string &path) static std::string getRelativeFileName(const std::string &sourcefile, const std::string &header) { + std::string path; if (sourcefile.find_first_of("\\/") != std::string::npos) - return simplecpp::simplifyPath(sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header); - return simplecpp::simplifyPath(header); + path = sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header; + else + path = header; + return simplecpp::simplifyPath(path); } static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) @@ -3110,7 +3160,7 @@ static std::string openHeaderRelative(std::ifstream &f, const std::string &sourc static std::string getIncludePathFileName(const std::string &includePath, const std::string &header) { - std::string path = includePath; + std::string path = toAbsolutePath(includePath); if (!path.empty() && path[path.size()-1U]!='/' && path[path.size()-1U]!='\\') path += '/'; return path + header; @@ -3119,9 +3169,9 @@ static std::string getIncludePathFileName(const std::string &includePath, const static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) { for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string simplePath = openHeader(f, getIncludePathFileName(*it, header)); - if (!simplePath.empty()) - return simplePath; + std::string path = openHeader(f, getIncludePathFileName(*it, header)); + if (!path.empty()) + return path; } return ""; } @@ -3131,49 +3181,76 @@ static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const if (isAbsolutePath(header)) return openHeader(f, header); - std::string ret; - if (systemheader) { - ret = openHeaderIncludePath(f, dui, header); - return ret; + // always return absolute path for systemheaders + return toAbsolutePath(openHeaderIncludePath(f, dui, header)); } + std::string ret; + ret = openHeaderRelative(f, sourcefile, header); if (ret.empty()) - return openHeaderIncludePath(f, dui, header); + return toAbsolutePath(openHeaderIncludePath(f, dui, header));// in a similar way to system headers return ret; } -static std::string getFileName(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +static std::string findPathInMapBothRelativeAndAbsolute(const std::map &filedata, const std::string& path) { + // here there are two possibilities - either we match this from absolute path or from a relative one + if (filedata.find(path) != filedata.end()) {// try first to respect the exact match + return path; + } + // otherwise - try to use the normalize to the correct representation + if (isAbsolutePath(path)) { + const std::pair relativeExtractedResult = extractRelativePathFromAbsolute(path); + if (relativeExtractedResult.second) { + const std::string relativePath = relativeExtractedResult.first; + if (filedata.find(relativePath) != filedata.end()) { + return relativePath; + } + } + } else { + const std::string absolutePath = toAbsolutePath(path); + if (filedata.find(absolutePath) != filedata.end()) + return absolutePath; + } + // otherwise + return ""; +} + +static std::string getFileIdPath(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { if (filedata.empty()) { return ""; } if (isAbsolutePath(header)) { - return (filedata.find(header) != filedata.end()) ? simplecpp::simplifyPath(header) : ""; + const std::string simplifiedHeaderPath = simplecpp::simplifyPath(header); + return (filedata.find(simplifiedHeaderPath) != filedata.end()) ? simplifiedHeaderPath : ""; } if (!systemheader) { - const std::string relativeFilename = getRelativeFileName(sourcefile, header); - if (filedata.find(relativeFilename) != filedata.end()) - return relativeFilename; + const std::string relativeOrAbsoluteFilename = getRelativeFileName(sourcefile, header);// unknown if absolute or relative, but always simplified + const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, relativeOrAbsoluteFilename); + if (!match.empty()) { + return match; + } } for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string s = simplecpp::simplifyPath(getIncludePathFileName(*it, header)); - if (filedata.find(s) != filedata.end()) - return s; + const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, simplecpp::simplifyPath(getIncludePathFileName(*it, header))); + if (!match.empty()) { + return match; + } } if (systemheader && filedata.find(header) != filedata.end()) - return header; + return header;// system header that its file wasn't found in the included paths but alreasy in the filedata - return this as is return ""; } static bool hasFile(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { - return !getFileName(filedata, sourcefile, header, dui, systemheader).empty(); + return !getFileIdPath(filedata, sourcefile, header, dui, systemheader).empty(); } std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) @@ -3529,7 +3606,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const bool systemheader = (inctok->str()[0] == '<'); const std::string header(realFilename(inctok->str().substr(1U, inctok->str().size() - 2U))); - std::string header2 = getFileName(filedata, rawtok->location.file(), header, dui, systemheader); + std::string header2 = getFileIdPath(filedata, rawtok->location.file(), header, dui, systemheader); if (header2.empty()) { // try to load file.. std::ifstream f; diff --git a/testutils.py b/testutils.py new file mode 100644 index 00000000..55a2686d --- /dev/null +++ b/testutils.py @@ -0,0 +1,57 @@ +import os +import subprocess +import json + +def __run_subprocess(args, env=None, cwd=None, timeout=None): + p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, cwd=cwd) + + try: + stdout, stderr = p.communicate(timeout=timeout) + return_code = p.returncode + p = None + except subprocess.TimeoutExpired: + import psutil + # terminate all the child processes + child_procs = psutil.Process(p.pid).children(recursive=True) + if len(child_procs) > 0: + for child in child_procs: + child.terminate() + try: + # call with timeout since it might be stuck + p.communicate(timeout=5) + p = None + except subprocess.TimeoutExpired: + pass + raise + finally: + if p: + # sending the signal to the process groups causes the parent Python process to terminate as well + #os.killpg(os.getpgid(p.pid), signal.SIGTERM) # Send the signal to all the process groups + p.terminate() + stdout, stderr = p.communicate() + p = None + + stdout = stdout.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') + stderr = stderr.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') + + return return_code, stdout, stderr + +def simplecpp(args = [], cwd = None): + dir_path = os.path.dirname(os.path.realpath(__file__)) + if 'SIMPLECPP_EXE_PATH' in os.environ: + simplecpp_path = os.environ['SIMPLECPP_EXE_PATH'] + else: + simplecpp_path = os.path.join(dir_path, "simplecpp") + return __run_subprocess([simplecpp_path] + args, cwd = cwd) + +def quoted_string(s): + return json.dumps(str(s)) + +def format_include_path_arg(include_path): + return f"-I{str(include_path)}" + +def format_include(include, is_sys_header=False): + if is_sys_header: + return f"<{quoted_string(include)[1:-1]}>" + else: + return quoted_string(include) From aa3c4b6af92cbac5abf3c997f2e044db835e6cc6 Mon Sep 17 00:00:00 2001 From: olabetskyi <153490942+olabetskyi@users.noreply.github.com> Date: Mon, 10 Feb 2025 13:29:03 +0200 Subject: [PATCH 02/41] Fix #404: simplecpp::TokenList::constFold does not fold '( 0 ) && 10 < X' properly (#405) --- simplecpp.cpp | 98 +++++++++++++++++++++++++++++++++------------------ simplecpp.h | 2 ++ test.cpp | 26 ++++++++++++++ 3 files changed, 92 insertions(+), 34 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 20ae2528..576b0da7 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -954,7 +954,7 @@ void simplecpp::TokenList::constFold() constFoldQuestionOp(&tok); // If there is no '(' we are done with the constant folding - if (tok->op != '(') + if (!tok || tok->op != '(') break; if (!tok->next || !tok->next->next || tok->next->next->op != ')') @@ -1164,10 +1164,7 @@ void simplecpp::TokenList::constFoldMulDivRem(Token *tok) } else continue; - tok = tok->previous; - tok->setstr(toString(result)); - deleteToken(tok->next); - deleteToken(tok->next); + simpleSquash(tok, toString(result)); } } @@ -1187,10 +1184,7 @@ void simplecpp::TokenList::constFoldAddSub(Token *tok) else continue; - tok = tok->previous; - tok->setstr(toString(result)); - deleteToken(tok->next); - deleteToken(tok->next); + simpleSquash(tok, toString(result)); } } @@ -1210,10 +1204,7 @@ void simplecpp::TokenList::constFoldShift(Token *tok) else continue; - tok = tok->previous; - tok->setstr(toString(result)); - deleteToken(tok->next); - deleteToken(tok->next); + simpleSquash(tok, toString(result)); } } @@ -1247,10 +1238,7 @@ void simplecpp::TokenList::constFoldComparison(Token *tok) else continue; - tok = tok->previous; - tok->setstr(toString(result)); - deleteToken(tok->next); - deleteToken(tok->next); + simpleSquash(tok, toString(result)); } } @@ -1282,12 +1270,51 @@ void simplecpp::TokenList::constFoldBitwise(Token *tok) result = (stringToLL(tok->previous->str()) ^ stringToLL(tok->next->str())); else /*if (*op == '|')*/ result = (stringToLL(tok->previous->str()) | stringToLL(tok->next->str())); - tok = tok->previous; - tok->setstr(toString(result)); - deleteToken(tok->next); - deleteToken(tok->next); + simpleSquash(tok, toString(result)); + } + } +} + +void simplecpp::TokenList::simpleSquash(Token *&tok, const std::string & result) +{ + tok = tok->previous; + tok->setstr(result); + deleteToken(tok->next); + deleteToken(tok->next); +} + +void simplecpp::TokenList::squashTokens(Token *&tok, const std::set & breakPoints, bool forwardDirection, const std::string & result) +{ + const char * const brackets = forwardDirection ? "()" : ")("; + Token* Token::* const step = forwardDirection ? &Token::next : &Token::previous; + int skip = 0; + const Token * const tok1 = tok->*step; + while (tok1 && tok1->*step) { + if ((tok1->*step)->op == brackets[1]){ + if (skip) { + --skip; + deleteToken(tok1->*step); + } else + break; + } else if ((tok1->*step)->op == brackets[0]) { + ++skip; + deleteToken(tok1->*step); + } else if (skip) { + deleteToken(tok1->*step); + } else if (breakPoints.count((tok1->*step)->str()) != 0) { + break; + } else { + deleteToken(tok1->*step); } } + simpleSquash(tok, result); +} + +static simplecpp::Token * constFoldGetOperand(simplecpp::Token * tok, bool forwardDirection) +{ + simplecpp::Token* simplecpp::Token::* const step = forwardDirection ? &simplecpp::Token::next : &simplecpp::Token::previous; + const char bracket = forwardDirection ? ')' : '('; + return tok->*step && (tok->*step)->number && (!((tok->*step)->*step) || (((tok->*step)->*step)->op == bracket)) ? tok->*step : nullptr; } static const std::string AND("and"); @@ -1303,21 +1330,24 @@ void simplecpp::TokenList::constFoldLogicalOp(Token *tok) } if (tok->str() != "&&" && tok->str() != "||") continue; - if (!tok->previous || !tok->previous->number) - continue; - if (!tok->next || !tok->next->number) + const Token* const lhs = constFoldGetOperand(tok, false); + const Token* const rhs = constFoldGetOperand(tok, true); + if (!lhs) // if lhs is not a single number we don't need to fold continue; - int result; - if (tok->str() == "||") - result = (stringToLL(tok->previous->str()) || stringToLL(tok->next->str())); - else /*if (tok->str() == "&&")*/ - result = (stringToLL(tok->previous->str()) && stringToLL(tok->next->str())); - - tok = tok->previous; - tok->setstr(toString(result)); - deleteToken(tok->next); - deleteToken(tok->next); + std::set breakPoints; + breakPoints.insert(":"); + breakPoints.insert("?"); + if (tok->str() == "||"){ + if (stringToLL(lhs->str()) != 0LL || (rhs && stringToLL(rhs->str()) != 0LL)) + squashTokens(tok, breakPoints, stringToLL(lhs->str()) != 0LL, toString(1)); + } else /*if (tok->str() == "&&")*/ { + breakPoints.insert("||"); + if (stringToLL(lhs->str()) == 0LL || (rhs && stringToLL(rhs->str()) == 0LL)) + squashTokens(tok, breakPoints, stringToLL(lhs->str()) == 0LL, toString(0)); + else if (rhs && stringToLL(lhs->str()) && stringToLL(rhs->str())) + simpleSquash(tok, "1"); + } } } diff --git a/simplecpp.h b/simplecpp.h index f5c69593..0be48306 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -301,6 +301,8 @@ namespace simplecpp { void constFoldLogicalOp(Token *tok); void constFoldQuestionOp(Token **tok1); + void simpleSquash(Token *&tok, const std::string & result); + void squashTokens(Token *&tok, const std::set & breakPoints, bool forwardDirection, const std::string & result); std::string readUntil(Stream &stream, const Location &location, char start, char end, OutputList *outputList); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); diff --git a/test.cpp b/test.cpp index 3ff00b33..622c9b90 100644 --- a/test.cpp +++ b/test.cpp @@ -452,6 +452,15 @@ static void constFold() ASSERT_EQUALS("1", testConstFold("010==8")); ASSERT_EQUALS("exception", testConstFold("!1 ? 2 :")); ASSERT_EQUALS("exception", testConstFold("?2:3")); + ASSERT_EQUALS("0", testConstFold("( 0 ) && 10 < X")); + ASSERT_EQUALS("0", testConstFold("1+2*(3+4) && 7 - 7")); + ASSERT_EQUALS("1", testConstFold("( 1 ) || 10 < X")); + ASSERT_EQUALS("1", testConstFold("1+2*(3+4) || 8 - 7")); + ASSERT_EQUALS("X && 0", testConstFold("X && 0")); + ASSERT_EQUALS("X >= 0 || 0 < Y", testConstFold("X >= 0 || 0 < Y")); + ASSERT_EQUALS("X && 1 && Z", testConstFold("X && (1 || Y) && Z")); + ASSERT_EQUALS("0 || Y", testConstFold("0 && X || Y")); + ASSERT_EQUALS("X > 0 && Y", testConstFold("X > 0 && Y")); } #ifdef __CYGWIN__ @@ -1598,6 +1607,22 @@ static void ifA() ASSERT_EQUALS("\nX", preprocess(code, dui)); } +static void ifXorY() +{ + const char code[] = "#if Z > 0 || 0 < Y\n" + "X\n" + "#endif"; + ASSERT_EQUALS("", preprocess(code)); + + simplecpp::DUI dui; + dui.defines.push_back("Z=1"); + ASSERT_EQUALS("\nX", preprocess(code, dui)); + + dui.defines.clear(); + dui.defines.push_back("Y=15"); + ASSERT_EQUALS("\nX", preprocess(code, dui)); +} + static void ifCharLiteral() { const char code[] = "#if ('A'==0x41)\n" @@ -3104,6 +3129,7 @@ int main(int argc, char **argv) TEST_CASE(ifdef2); TEST_CASE(ifndef); TEST_CASE(ifA); + TEST_CASE(ifXorY); TEST_CASE(ifCharLiteral); TEST_CASE(ifDefined); TEST_CASE(ifDefinedNoPar); From cc5738cbbb7ac0a8d121ef5a6c67af08bbb9b936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Mon, 10 Feb 2025 12:45:11 +0100 Subject: [PATCH 03/41] bumped minimum CMake version to 3.10 (#408) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 88c46b9e..c3fcf4ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.5) +cmake_minimum_required (VERSION 3.10) project (simplecpp LANGUAGES CXX) option(DISABLE_CPP03_SYNTAX_CHECK "Disable the C++03 syntax check." OFF) From 48a958fe25f0f0f39520b96858b5d6fa6b2c705c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Wed, 12 Feb 2025 09:59:54 +0000 Subject: [PATCH 04/41] Fix #403 (Stack overflow in Macro::expand()) (#411) --- simplecpp.cpp | 3 ++- test.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 576b0da7..d8880355 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -2169,7 +2169,8 @@ namespace simplecpp { for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U];) { const MacroMap::const_iterator it = macros.find(partok->str()); if (it != macros.end() && !partok->isExpandedFrom(&it->second) && (partok->str() == name() || expandedmacros.find(partok->str()) == expandedmacros.end())) { - const std::set expandedmacros2; // temporary amnesia to allow reexpansion of currently expanding macros during argument evaluation + std::set expandedmacros2(expandedmacros); // temporary amnesia to allow reexpansion of currently expanding macros during argument evaluation + expandedmacros2.erase(name()); partok = it->second.expand(output, loc, partok, macros, expandedmacros2); } else { output->push_back(newMacroToken(partok->str(), loc, isReplaced(expandedmacros), partok)); diff --git a/test.cpp b/test.cpp index 622c9b90..3e245516 100644 --- a/test.cpp +++ b/test.cpp @@ -859,6 +859,16 @@ static void define_define_22() // #400 inner macro not expanded after hash hash ASSERT_EQUALS("\n\n\n34", preprocess(code)); } +static void define_define_23() // #403 crash (infinite recursion) +{ + const char code[] = "#define C_(x, y) x ## y\n" + "#define C(x, y) C_(x, y)\n" + "#define X(func) C(Y, C(func, Z))\n" + "#define die X(die)\n" + "die(void);\n"; + ASSERT_EQUALS("\n\n\n\nYdieZ ( void ) ;", preprocess(code)); +} + static void define_va_args_1() { const char code[] = "#define A(fmt...) dostuff(fmt)\n" @@ -3055,6 +3065,7 @@ int main(int argc, char **argv) TEST_CASE(define_define_20); // 384 arg contains comma TEST_CASE(define_define_21); TEST_CASE(define_define_22); // #400 + TEST_CASE(define_define_23); // #403 - crash, infinite recursion TEST_CASE(define_va_args_1); TEST_CASE(define_va_args_2); TEST_CASE(define_va_args_3); From 9b0c842f683edef45f758241011f50da93fe02ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Wed, 12 Feb 2025 12:49:07 +0000 Subject: [PATCH 05/41] Fix #409 (fuzzing crash in simplecpp::Macro::expandToken()) (#412) --- simplecpp.cpp | 2 +- test.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index d8880355..b8dd063d 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -2125,7 +2125,7 @@ namespace simplecpp { if (expandArg(&temp, defToken, parametertokens)) macroName = temp.cback()->str(); if (expandArg(&temp, defToken->next->next->next, parametertokens)) - macroName += temp.cback()->str(); + macroName += temp.cback() ? temp.cback()->str() : ""; else macroName += defToken->next->next->next->str(); lastToken = defToken->next->next->next; diff --git a/test.cpp b/test.cpp index 3e245516..82b9e1c7 100644 --- a/test.cpp +++ b/test.cpp @@ -1717,6 +1717,17 @@ static void ifDefinedHashHash() ASSERT_EQUALS("file0,4,#error,#error FOO is enabled\n", toString(outputList)); } +static void ifDefinedHashHash2() +{ + // #409 + // do not crash when expanding P() (as ## rhs is "null") + // note: gcc outputs "defined E" + const char code[] = "#define P(p)defined E##p\n" + "P()\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("\n0", preprocess(code, &outputList)); +} + static void ifLogical() { const char code[] = "#if defined(A) || defined(B)\n" @@ -3149,6 +3160,7 @@ int main(int argc, char **argv) TEST_CASE(ifDefinedInvalid1); TEST_CASE(ifDefinedInvalid2); TEST_CASE(ifDefinedHashHash); + TEST_CASE(ifDefinedHashHash2); TEST_CASE(ifLogical); TEST_CASE(ifSizeof); TEST_CASE(elif); From 9ce981ccb54928f148a48ad150da9e1b7520b748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Wed, 12 Feb 2025 14:37:25 +0000 Subject: [PATCH 06/41] Revert "fix: use both absolute and relative header paths in header matching (#362)" (#415) --- .github/workflows/CI-unixish.yml | 17 +---- .github/workflows/CI-windows.yml | 18 +---- .gitignore | 3 - integration_test.py | 94 ------------------------ simplecpp.cpp | 121 ++++++------------------------- testutils.py | 57 --------------- 6 files changed, 24 insertions(+), 286 deletions(-) delete mode 100644 integration_test.py delete mode 100644 testutils.py diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 9a0e8d8e..050e1d74 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -30,18 +30,7 @@ jobs: run: | sudo apt-get update sudo apt-get install libc++-18-dev - - - name: Install missing software on macos - if: contains(matrix.os, 'macos') - run: | - brew install python3 - - - name: Install missing Python packages - run: | - python3 -m pip config set global.break-system-packages true - python3 -m pip install pip --upgrade - python3 -m pip install pytest - + - name: make simplecpp run: make -j$(nproc) @@ -52,10 +41,6 @@ jobs: run: | make -j$(nproc) selfcheck - - name: integration test - run: | - python3 -m pytest integration_test.py - - name: Run CMake run: | cmake -S . -B cmake.output diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 50d5a84e..1f78876d 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -26,18 +26,7 @@ jobs: - name: Setup msbuild.exe uses: microsoft/setup-msbuild@v2 - - - name: Set up Python 3.13 - uses: actions/setup-python@v5 - with: - python-version: '3.13' - check-latest: true - - - name: Install missing Python packages - run: | - python -m pip install pip --upgrade || exit /b !errorlevel! - python -m pip install pytest || exit /b !errorlevel! - + - name: Run cmake if: matrix.os == 'windows-2019' run: | @@ -59,9 +48,4 @@ jobs: - name: Selfcheck run: | .\${{ matrix.config }}\simplecpp.exe simplecpp.cpp -e || exit /b !errorlevel! - - - name: integration test - run: | - set SIMPLECPP_EXE_PATH=.\${{ matrix.config }}\simplecpp.exe - python -m pytest integration_test.py || exit /b !errorlevel! diff --git a/.gitignore b/.gitignore index 113cf360..183545f1 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,3 @@ testrunner # CLion /.idea /cmake-build-* - -# python -__pycache__/ diff --git a/integration_test.py b/integration_test.py deleted file mode 100644 index 0b2b0b38..00000000 --- a/integration_test.py +++ /dev/null @@ -1,94 +0,0 @@ -## test with python -m pytest integration_test.py - -import os -import pytest -from testutils import simplecpp, format_include_path_arg, format_include - -def __test_relative_header_create_header(dir, with_pragma_once=True): - header_file = os.path.join(dir, 'test.h') - with open(header_file, 'wt') as f: - f.write(f""" - {"#pragma once" if with_pragma_once else ""} - #ifndef TEST_H_INCLUDED - #define TEST_H_INCLUDED - #else - #error header_was_already_included - #endif - """) - return header_file, "error: #error header_was_already_included" - -def __test_relative_header_create_source(dir, include1, include2, is_include1_sys=False, is_include2_sys=False, inv=False): - if inv: - return __test_relative_header_create_source(dir, include1=include2, include2=include1, is_include1_sys=is_include2_sys, is_include2_sys=is_include1_sys) - ## otherwise - - src_file = os.path.join(dir, 'test.c') - with open(src_file, 'wt') as f: - f.write(f""" - #undef TEST_H_INCLUDED - #include {format_include(include1, is_include1_sys)} - #include {format_include(include2, is_include2_sys)} - """) - return src_file - -@pytest.mark.parametrize("with_pragma_once", (False, True)) -@pytest.mark.parametrize("is_sys", (False, True)) -def test_relative_header_1(tmpdir, with_pragma_once, is_sys): - _, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) - - test_file = __test_relative_header_create_source(tmpdir, "test.h", "test.h", is_include1_sys=is_sys, is_include2_sys=is_sys) - - args = ([format_include_path_arg(tmpdir)] if is_sys else []) + [test_file] - - _, _, stderr = simplecpp(args, cwd=tmpdir) - - if with_pragma_once: - assert stderr == '' - else: - assert double_include_error in stderr - -@pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_2(tmpdir, inv): - header_file, _ = __test_relative_header_create_header(tmpdir) - - test_file = __test_relative_header_create_source(tmpdir, "test.h", header_file, inv=inv) - - args = [test_file] - - _, _, stderr = simplecpp(args, cwd=tmpdir) - assert stderr == '' - -@pytest.mark.parametrize("is_sys", (False, True)) -@pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_3(tmpdir, is_sys, inv): - test_subdir = os.path.join(tmpdir, "test_subdir") - os.mkdir(test_subdir) - header_file, _ = __test_relative_header_create_header(test_subdir) - - test_file = __test_relative_header_create_source(tmpdir, "test_subdir/test.h", header_file, is_include1_sys=is_sys, inv=inv) - - args = [test_file] - - _, _, stderr = simplecpp(args, cwd=tmpdir) - - if is_sys: - assert "missing header: Header not found" in stderr - else: - assert stderr == '' - -@pytest.mark.parametrize("use_short_path", (False, True)) -@pytest.mark.parametrize("is_sys", (False, True)) -@pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_4(tmpdir, use_short_path, is_sys, inv): - test_subdir = os.path.join(tmpdir, "test_subdir") - os.mkdir(test_subdir) - header_file, _ = __test_relative_header_create_header(test_subdir) - if use_short_path: - header_file = "test_subdir/test.h" - - test_file = __test_relative_header_create_source(tmpdir, header_file, "test.h", is_include2_sys=is_sys, inv=inv) - - args = [format_include_path_arg(test_subdir), test_file] - - _, _, stderr = simplecpp(args, cwd=tmpdir) - assert stderr == '' diff --git a/simplecpp.cpp b/simplecpp.cpp index b8dd063d..1ae47f5e 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -43,8 +43,6 @@ #ifdef SIMPLECPP_WINDOWS #include #undef ERROR -#else -#include #endif #if __cplusplus >= 201103L @@ -149,11 +147,6 @@ static unsigned long long stringToULL(const std::string &s) return ret; } -static bool startsWith(const std::string &s, const std::string &p) -{ - return (s.size() >= p.size()) && std::equal(p.begin(), p.end(), s.begin()); -} - static bool endsWith(const std::string &s, const std::string &e) { return (s.size() >= e.size()) && std::equal(e.rbegin(), e.rend(), s.rbegin()); @@ -2718,46 +2711,6 @@ static bool isCpp17OrLater(const simplecpp::DUI &dui) return !std_ver.empty() && (std_ver >= "201703L"); } - -static std::string currentDirectoryOSCalc() { -#ifdef SIMPLECPP_WINDOWS - TCHAR NPath[MAX_PATH]; - GetCurrentDirectory(MAX_PATH, NPath); - return NPath; -#else - const std::size_t size = 1024; - char the_path[size]; - getcwd(the_path, size); - return the_path; -#endif -} - -static const std::string& currentDirectory() { - static const std::string curdir = simplecpp::simplifyPath(currentDirectoryOSCalc()); - return curdir; -} - -static std::string toAbsolutePath(const std::string& path) { - if (path.empty()) { - return path;// preserve error file path that is indicated by an empty string - } - if (!isAbsolutePath(path)) { - return currentDirectory() + "/" + path; - } - // otherwise - return path; -} - -static std::pair extractRelativePathFromAbsolute(const std::string& absolutepath) { - static const std::string prefix = currentDirectory() + "/"; - if (startsWith(absolutepath, prefix)) { - const std::size_t size = prefix.size(); - return std::make_pair(absolutepath.substr(size, absolutepath.size() - size), true); - } - // otherwise - return std::make_pair("", false); -} - static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI &dui) { @@ -3176,12 +3129,9 @@ static std::string openHeader(std::ifstream &f, const std::string &path) static std::string getRelativeFileName(const std::string &sourcefile, const std::string &header) { - std::string path; if (sourcefile.find_first_of("\\/") != std::string::npos) - path = sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header; - else - path = header; - return simplecpp::simplifyPath(path); + return simplecpp::simplifyPath(sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header); + return simplecpp::simplifyPath(header); } static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) @@ -3191,7 +3141,7 @@ static std::string openHeaderRelative(std::ifstream &f, const std::string &sourc static std::string getIncludePathFileName(const std::string &includePath, const std::string &header) { - std::string path = toAbsolutePath(includePath); + std::string path = includePath; if (!path.empty() && path[path.size()-1U]!='/' && path[path.size()-1U]!='\\') path += '/'; return path + header; @@ -3200,9 +3150,9 @@ static std::string getIncludePathFileName(const std::string &includePath, const static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) { for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string path = openHeader(f, getIncludePathFileName(*it, header)); - if (!path.empty()) - return path; + std::string simplePath = openHeader(f, getIncludePathFileName(*it, header)); + if (!simplePath.empty()) + return simplePath; } return ""; } @@ -3212,76 +3162,49 @@ static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const if (isAbsolutePath(header)) return openHeader(f, header); + std::string ret; + if (systemheader) { - // always return absolute path for systemheaders - return toAbsolutePath(openHeaderIncludePath(f, dui, header)); + ret = openHeaderIncludePath(f, dui, header); + return ret; } - std::string ret; - ret = openHeaderRelative(f, sourcefile, header); if (ret.empty()) - return toAbsolutePath(openHeaderIncludePath(f, dui, header));// in a similar way to system headers + return openHeaderIncludePath(f, dui, header); return ret; } -static std::string findPathInMapBothRelativeAndAbsolute(const std::map &filedata, const std::string& path) { - // here there are two possibilities - either we match this from absolute path or from a relative one - if (filedata.find(path) != filedata.end()) {// try first to respect the exact match - return path; - } - // otherwise - try to use the normalize to the correct representation - if (isAbsolutePath(path)) { - const std::pair relativeExtractedResult = extractRelativePathFromAbsolute(path); - if (relativeExtractedResult.second) { - const std::string relativePath = relativeExtractedResult.first; - if (filedata.find(relativePath) != filedata.end()) { - return relativePath; - } - } - } else { - const std::string absolutePath = toAbsolutePath(path); - if (filedata.find(absolutePath) != filedata.end()) - return absolutePath; - } - // otherwise - return ""; -} - -static std::string getFileIdPath(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +static std::string getFileName(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { if (filedata.empty()) { return ""; } if (isAbsolutePath(header)) { - const std::string simplifiedHeaderPath = simplecpp::simplifyPath(header); - return (filedata.find(simplifiedHeaderPath) != filedata.end()) ? simplifiedHeaderPath : ""; + return (filedata.find(header) != filedata.end()) ? simplecpp::simplifyPath(header) : ""; } if (!systemheader) { - const std::string relativeOrAbsoluteFilename = getRelativeFileName(sourcefile, header);// unknown if absolute or relative, but always simplified - const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, relativeOrAbsoluteFilename); - if (!match.empty()) { - return match; - } + const std::string relativeFilename = getRelativeFileName(sourcefile, header); + if (filedata.find(relativeFilename) != filedata.end()) + return relativeFilename; } for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, simplecpp::simplifyPath(getIncludePathFileName(*it, header))); - if (!match.empty()) { - return match; - } + std::string s = simplecpp::simplifyPath(getIncludePathFileName(*it, header)); + if (filedata.find(s) != filedata.end()) + return s; } if (systemheader && filedata.find(header) != filedata.end()) - return header;// system header that its file wasn't found in the included paths but alreasy in the filedata - return this as is + return header; return ""; } static bool hasFile(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { - return !getFileIdPath(filedata, sourcefile, header, dui, systemheader).empty(); + return !getFileName(filedata, sourcefile, header, dui, systemheader).empty(); } std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) @@ -3637,7 +3560,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const bool systemheader = (inctok->str()[0] == '<'); const std::string header(realFilename(inctok->str().substr(1U, inctok->str().size() - 2U))); - std::string header2 = getFileIdPath(filedata, rawtok->location.file(), header, dui, systemheader); + std::string header2 = getFileName(filedata, rawtok->location.file(), header, dui, systemheader); if (header2.empty()) { // try to load file.. std::ifstream f; diff --git a/testutils.py b/testutils.py deleted file mode 100644 index 55a2686d..00000000 --- a/testutils.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -import subprocess -import json - -def __run_subprocess(args, env=None, cwd=None, timeout=None): - p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, cwd=cwd) - - try: - stdout, stderr = p.communicate(timeout=timeout) - return_code = p.returncode - p = None - except subprocess.TimeoutExpired: - import psutil - # terminate all the child processes - child_procs = psutil.Process(p.pid).children(recursive=True) - if len(child_procs) > 0: - for child in child_procs: - child.terminate() - try: - # call with timeout since it might be stuck - p.communicate(timeout=5) - p = None - except subprocess.TimeoutExpired: - pass - raise - finally: - if p: - # sending the signal to the process groups causes the parent Python process to terminate as well - #os.killpg(os.getpgid(p.pid), signal.SIGTERM) # Send the signal to all the process groups - p.terminate() - stdout, stderr = p.communicate() - p = None - - stdout = stdout.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') - stderr = stderr.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') - - return return_code, stdout, stderr - -def simplecpp(args = [], cwd = None): - dir_path = os.path.dirname(os.path.realpath(__file__)) - if 'SIMPLECPP_EXE_PATH' in os.environ: - simplecpp_path = os.environ['SIMPLECPP_EXE_PATH'] - else: - simplecpp_path = os.path.join(dir_path, "simplecpp") - return __run_subprocess([simplecpp_path] + args, cwd = cwd) - -def quoted_string(s): - return json.dumps(str(s)) - -def format_include_path_arg(include_path): - return f"-I{str(include_path)}" - -def format_include(include, is_sys_header=False): - if is_sys_header: - return f"<{quoted_string(include)[1:-1]}>" - else: - return quoted_string(include) From 09a816319ca6ccc89f45b4e883db07276d19c295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Wed, 12 Feb 2025 19:54:55 +0100 Subject: [PATCH 07/41] Revert "Fix #404: simplecpp::TokenList::constFold does not fold '( 0 ) && 10 < X' properly (#405)" (#416) --- simplecpp.cpp | 98 ++++++++++++++++++--------------------------------- simplecpp.h | 2 -- test.cpp | 26 -------------- 3 files changed, 34 insertions(+), 92 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 1ae47f5e..2316c42b 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -947,7 +947,7 @@ void simplecpp::TokenList::constFold() constFoldQuestionOp(&tok); // If there is no '(' we are done with the constant folding - if (!tok || tok->op != '(') + if (tok->op != '(') break; if (!tok->next || !tok->next->next || tok->next->next->op != ')') @@ -1157,7 +1157,10 @@ void simplecpp::TokenList::constFoldMulDivRem(Token *tok) } else continue; - simpleSquash(tok, toString(result)); + tok = tok->previous; + tok->setstr(toString(result)); + deleteToken(tok->next); + deleteToken(tok->next); } } @@ -1177,7 +1180,10 @@ void simplecpp::TokenList::constFoldAddSub(Token *tok) else continue; - simpleSquash(tok, toString(result)); + tok = tok->previous; + tok->setstr(toString(result)); + deleteToken(tok->next); + deleteToken(tok->next); } } @@ -1197,7 +1203,10 @@ void simplecpp::TokenList::constFoldShift(Token *tok) else continue; - simpleSquash(tok, toString(result)); + tok = tok->previous; + tok->setstr(toString(result)); + deleteToken(tok->next); + deleteToken(tok->next); } } @@ -1231,7 +1240,10 @@ void simplecpp::TokenList::constFoldComparison(Token *tok) else continue; - simpleSquash(tok, toString(result)); + tok = tok->previous; + tok->setstr(toString(result)); + deleteToken(tok->next); + deleteToken(tok->next); } } @@ -1263,51 +1275,12 @@ void simplecpp::TokenList::constFoldBitwise(Token *tok) result = (stringToLL(tok->previous->str()) ^ stringToLL(tok->next->str())); else /*if (*op == '|')*/ result = (stringToLL(tok->previous->str()) | stringToLL(tok->next->str())); - simpleSquash(tok, toString(result)); - } - } -} - -void simplecpp::TokenList::simpleSquash(Token *&tok, const std::string & result) -{ - tok = tok->previous; - tok->setstr(result); - deleteToken(tok->next); - deleteToken(tok->next); -} - -void simplecpp::TokenList::squashTokens(Token *&tok, const std::set & breakPoints, bool forwardDirection, const std::string & result) -{ - const char * const brackets = forwardDirection ? "()" : ")("; - Token* Token::* const step = forwardDirection ? &Token::next : &Token::previous; - int skip = 0; - const Token * const tok1 = tok->*step; - while (tok1 && tok1->*step) { - if ((tok1->*step)->op == brackets[1]){ - if (skip) { - --skip; - deleteToken(tok1->*step); - } else - break; - } else if ((tok1->*step)->op == brackets[0]) { - ++skip; - deleteToken(tok1->*step); - } else if (skip) { - deleteToken(tok1->*step); - } else if (breakPoints.count((tok1->*step)->str()) != 0) { - break; - } else { - deleteToken(tok1->*step); + tok = tok->previous; + tok->setstr(toString(result)); + deleteToken(tok->next); + deleteToken(tok->next); } } - simpleSquash(tok, result); -} - -static simplecpp::Token * constFoldGetOperand(simplecpp::Token * tok, bool forwardDirection) -{ - simplecpp::Token* simplecpp::Token::* const step = forwardDirection ? &simplecpp::Token::next : &simplecpp::Token::previous; - const char bracket = forwardDirection ? ')' : '('; - return tok->*step && (tok->*step)->number && (!((tok->*step)->*step) || (((tok->*step)->*step)->op == bracket)) ? tok->*step : nullptr; } static const std::string AND("and"); @@ -1323,24 +1296,21 @@ void simplecpp::TokenList::constFoldLogicalOp(Token *tok) } if (tok->str() != "&&" && tok->str() != "||") continue; - const Token* const lhs = constFoldGetOperand(tok, false); - const Token* const rhs = constFoldGetOperand(tok, true); - if (!lhs) // if lhs is not a single number we don't need to fold + if (!tok->previous || !tok->previous->number) + continue; + if (!tok->next || !tok->next->number) continue; - std::set breakPoints; - breakPoints.insert(":"); - breakPoints.insert("?"); - if (tok->str() == "||"){ - if (stringToLL(lhs->str()) != 0LL || (rhs && stringToLL(rhs->str()) != 0LL)) - squashTokens(tok, breakPoints, stringToLL(lhs->str()) != 0LL, toString(1)); - } else /*if (tok->str() == "&&")*/ { - breakPoints.insert("||"); - if (stringToLL(lhs->str()) == 0LL || (rhs && stringToLL(rhs->str()) == 0LL)) - squashTokens(tok, breakPoints, stringToLL(lhs->str()) == 0LL, toString(0)); - else if (rhs && stringToLL(lhs->str()) && stringToLL(rhs->str())) - simpleSquash(tok, "1"); - } + int result; + if (tok->str() == "||") + result = (stringToLL(tok->previous->str()) || stringToLL(tok->next->str())); + else /*if (tok->str() == "&&")*/ + result = (stringToLL(tok->previous->str()) && stringToLL(tok->next->str())); + + tok = tok->previous; + tok->setstr(toString(result)); + deleteToken(tok->next); + deleteToken(tok->next); } } diff --git a/simplecpp.h b/simplecpp.h index 0be48306..f5c69593 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -301,8 +301,6 @@ namespace simplecpp { void constFoldLogicalOp(Token *tok); void constFoldQuestionOp(Token **tok1); - void simpleSquash(Token *&tok, const std::string & result); - void squashTokens(Token *&tok, const std::set & breakPoints, bool forwardDirection, const std::string & result); std::string readUntil(Stream &stream, const Location &location, char start, char end, OutputList *outputList); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); diff --git a/test.cpp b/test.cpp index 82b9e1c7..187b7ec6 100644 --- a/test.cpp +++ b/test.cpp @@ -452,15 +452,6 @@ static void constFold() ASSERT_EQUALS("1", testConstFold("010==8")); ASSERT_EQUALS("exception", testConstFold("!1 ? 2 :")); ASSERT_EQUALS("exception", testConstFold("?2:3")); - ASSERT_EQUALS("0", testConstFold("( 0 ) && 10 < X")); - ASSERT_EQUALS("0", testConstFold("1+2*(3+4) && 7 - 7")); - ASSERT_EQUALS("1", testConstFold("( 1 ) || 10 < X")); - ASSERT_EQUALS("1", testConstFold("1+2*(3+4) || 8 - 7")); - ASSERT_EQUALS("X && 0", testConstFold("X && 0")); - ASSERT_EQUALS("X >= 0 || 0 < Y", testConstFold("X >= 0 || 0 < Y")); - ASSERT_EQUALS("X && 1 && Z", testConstFold("X && (1 || Y) && Z")); - ASSERT_EQUALS("0 || Y", testConstFold("0 && X || Y")); - ASSERT_EQUALS("X > 0 && Y", testConstFold("X > 0 && Y")); } #ifdef __CYGWIN__ @@ -1617,22 +1608,6 @@ static void ifA() ASSERT_EQUALS("\nX", preprocess(code, dui)); } -static void ifXorY() -{ - const char code[] = "#if Z > 0 || 0 < Y\n" - "X\n" - "#endif"; - ASSERT_EQUALS("", preprocess(code)); - - simplecpp::DUI dui; - dui.defines.push_back("Z=1"); - ASSERT_EQUALS("\nX", preprocess(code, dui)); - - dui.defines.clear(); - dui.defines.push_back("Y=15"); - ASSERT_EQUALS("\nX", preprocess(code, dui)); -} - static void ifCharLiteral() { const char code[] = "#if ('A'==0x41)\n" @@ -3151,7 +3126,6 @@ int main(int argc, char **argv) TEST_CASE(ifdef2); TEST_CASE(ifndef); TEST_CASE(ifA); - TEST_CASE(ifXorY); TEST_CASE(ifCharLiteral); TEST_CASE(ifDefined); TEST_CASE(ifDefinedNoPar); From 78f0f7cef4d84ffea31af2b810f6778b24f4ea91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Tue, 18 Mar 2025 14:51:24 +0100 Subject: [PATCH 08/41] clang-tidy.yml: updated to Clang 20 (#388) --- .github/workflows/clang-tidy.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 22602075..9a4f43c9 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -21,19 +21,19 @@ jobs: run: | wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh - sudo ./llvm.sh 19 - sudo apt-get install clang-tidy-19 + sudo ./llvm.sh 20 + sudo apt-get install clang-tidy-20 - name: Verify clang-tidy configuration run: | - clang-tidy-19 --verify-config + clang-tidy-20 --verify-config - name: Prepare CMake run: | cmake -S . -B cmake.output -G "Unix Makefiles" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DDISABLE_CPP03_SYNTAX_CHECK=ON env: - CXX: clang-19 + CXX: clang-20 - name: Clang-Tidy run: | - run-clang-tidy-19 -q -j $(nproc) -p=cmake.output + run-clang-tidy-20 -q -j $(nproc) -p=cmake.output From 0c8867436fca46820b00cd36cfceebf6f48b56ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Sun, 13 Apr 2025 14:50:09 +0200 Subject: [PATCH 09/41] CI-unixish.yml: removed `ubuntu-20.04` (#425) --- .github/workflows/CI-unixish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 050e1d74..46f3dc02 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: compiler: [clang++, g++] - os: [ubuntu-20.04, ubuntu-22.04, ubuntu-24.04, macos-13, macos-14] + os: [ubuntu-22.04, ubuntu-24.04, macos-13, macos-14] fail-fast: false runs-on: ${{ matrix.os }} From 6adb70f125439a97af6d0136e78de482c3799f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Tue, 6 May 2025 10:49:50 +0200 Subject: [PATCH 10/41] CI-windows.yml: removed `windows-2019` and added `windows-2025` (#426) --- .github/workflows/CI-windows.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 1f78876d..37ebf4fc 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -15,7 +15,7 @@ jobs: build: strategy: matrix: - os: [windows-2019, windows-2022] + os: [windows-2022, windows-2025] config: [Release, Debug] fail-fast: false @@ -27,13 +27,7 @@ jobs: - name: Setup msbuild.exe uses: microsoft/setup-msbuild@v2 - - name: Run cmake - if: matrix.os == 'windows-2019' - run: | - cmake -G "Visual Studio 16 2019" -A x64 . || exit /b !errorlevel! - - - name: Run cmake - if: matrix.os == 'windows-2022' + - name: Run CMake run: | cmake -G "Visual Studio 17 2022" -A x64 . || exit /b !errorlevel! From 62afdd0b754dbbeba1d2c34cae172786da1e201b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Tue, 6 May 2025 17:17:14 +0200 Subject: [PATCH 11/41] CI-unixish.yml: added `macos-15` (#407) --- .github/workflows/CI-unixish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 46f3dc02..7e962a47 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: compiler: [clang++, g++] - os: [ubuntu-22.04, ubuntu-24.04, macos-13, macos-14] + os: [ubuntu-22.04, ubuntu-24.04, macos-13, macos-14, macos-15] fail-fast: false runs-on: ${{ matrix.os }} From 07757314899f0a59d4bdb480015f21789a1db9be Mon Sep 17 00:00:00 2001 From: Tal500 Date: Fri, 9 May 2025 16:40:01 +0300 Subject: [PATCH 12/41] Fix relative paths, again (#418) Co-authored-by: Tal Hadad --- .github/workflows/CI-unixish.yml | 16 +++- .github/workflows/CI-windows.yml | 18 ++++- .gitignore | 3 + integration_test.py | 94 ++++++++++++++++++++++ simplecpp.cpp | 134 ++++++++++++++++++++++++------- testutils.py | 57 +++++++++++++ 6 files changed, 292 insertions(+), 30 deletions(-) create mode 100644 integration_test.py create mode 100644 testutils.py diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 7e962a47..c84fc052 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -30,7 +30,17 @@ jobs: run: | sudo apt-get update sudo apt-get install libc++-18-dev - + + - name: Install missing software on macos + if: contains(matrix.os, 'macos') + run: | + brew install python3 + + - name: Install missing Python packages + run: | + python3 -m pip config set global.break-system-packages true + python3 -m pip install pytest + - name: make simplecpp run: make -j$(nproc) @@ -41,6 +51,10 @@ jobs: run: | make -j$(nproc) selfcheck + - name: integration test + run: | + python3 -m pytest integration_test.py + - name: Run CMake run: | cmake -S . -B cmake.output diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 37ebf4fc..3e017182 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -26,7 +26,18 @@ jobs: - name: Setup msbuild.exe uses: microsoft/setup-msbuild@v2 - + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + check-latest: true + + - name: Install missing Python packages + run: | + python -m pip install pip --upgrade || exit /b !errorlevel! + python -m pip install pytest || exit /b !errorlevel! + - name: Run CMake run: | cmake -G "Visual Studio 17 2022" -A x64 . || exit /b !errorlevel! @@ -42,4 +53,9 @@ jobs: - name: Selfcheck run: | .\${{ matrix.config }}\simplecpp.exe simplecpp.cpp -e || exit /b !errorlevel! + + - name: integration test + run: | + set SIMPLECPP_EXE_PATH=.\${{ matrix.config }}\simplecpp.exe + python -m pytest integration_test.py || exit /b !errorlevel! diff --git a/.gitignore b/.gitignore index 183545f1..113cf360 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,6 @@ testrunner # CLion /.idea /cmake-build-* + +# python +__pycache__/ diff --git a/integration_test.py b/integration_test.py new file mode 100644 index 00000000..0b2b0b38 --- /dev/null +++ b/integration_test.py @@ -0,0 +1,94 @@ +## test with python -m pytest integration_test.py + +import os +import pytest +from testutils import simplecpp, format_include_path_arg, format_include + +def __test_relative_header_create_header(dir, with_pragma_once=True): + header_file = os.path.join(dir, 'test.h') + with open(header_file, 'wt') as f: + f.write(f""" + {"#pragma once" if with_pragma_once else ""} + #ifndef TEST_H_INCLUDED + #define TEST_H_INCLUDED + #else + #error header_was_already_included + #endif + """) + return header_file, "error: #error header_was_already_included" + +def __test_relative_header_create_source(dir, include1, include2, is_include1_sys=False, is_include2_sys=False, inv=False): + if inv: + return __test_relative_header_create_source(dir, include1=include2, include2=include1, is_include1_sys=is_include2_sys, is_include2_sys=is_include1_sys) + ## otherwise + + src_file = os.path.join(dir, 'test.c') + with open(src_file, 'wt') as f: + f.write(f""" + #undef TEST_H_INCLUDED + #include {format_include(include1, is_include1_sys)} + #include {format_include(include2, is_include2_sys)} + """) + return src_file + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +def test_relative_header_1(tmpdir, with_pragma_once, is_sys): + _, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + + test_file = __test_relative_header_create_source(tmpdir, "test.h", "test.h", is_include1_sys=is_sys, is_include2_sys=is_sys) + + args = ([format_include_path_arg(tmpdir)] if is_sys else []) + [test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + + if with_pragma_once: + assert stderr == '' + else: + assert double_include_error in stderr + +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_2(tmpdir, inv): + header_file, _ = __test_relative_header_create_header(tmpdir) + + test_file = __test_relative_header_create_source(tmpdir, "test.h", header_file, inv=inv) + + args = [test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + assert stderr == '' + +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_3(tmpdir, is_sys, inv): + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + header_file, _ = __test_relative_header_create_header(test_subdir) + + test_file = __test_relative_header_create_source(tmpdir, "test_subdir/test.h", header_file, is_include1_sys=is_sys, inv=inv) + + args = [test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + + if is_sys: + assert "missing header: Header not found" in stderr + else: + assert stderr == '' + +@pytest.mark.parametrize("use_short_path", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_4(tmpdir, use_short_path, is_sys, inv): + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + header_file, _ = __test_relative_header_create_header(test_subdir) + if use_short_path: + header_file = "test_subdir/test.h" + + test_file = __test_relative_header_create_source(tmpdir, header_file, "test.h", is_include2_sys=is_sys, inv=inv) + + args = [format_include_path_arg(test_subdir), test_file] + + _, _, stderr = simplecpp(args, cwd=tmpdir) + assert stderr == '' diff --git a/simplecpp.cpp b/simplecpp.cpp index 2316c42b..25c4124a 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -40,6 +40,12 @@ #include #include +#ifdef _WIN32 +#include +#else +#include +#endif + #ifdef SIMPLECPP_WINDOWS #include #undef ERROR @@ -147,6 +153,12 @@ static unsigned long long stringToULL(const std::string &s) return ret; } +// TODO: added an undercore since this conflicts with a function of the same name in utils.h from Cppcheck source when building Cppcheck with MSBuild +static bool startsWith_(const std::string &s, const std::string &p) +{ + return (s.size() >= p.size()) && std::equal(p.begin(), p.end(), s.begin()); +} + static bool endsWith(const std::string &s, const std::string &e) { return (s.size() >= e.size()) && std::equal(e.rbegin(), e.rend(), s.rbegin()); @@ -2334,17 +2346,12 @@ namespace simplecpp { namespace simplecpp { #ifdef __CYGWIN__ - bool startsWith(const std::string &str, const std::string &s) - { - return (str.size() >= s.size() && str.compare(0, s.size(), s) == 0); - } - std::string convertCygwinToWindowsPath(const std::string &cygwinPath) { std::string windowsPath; std::string::size_type pos = 0; - if (cygwinPath.size() >= 11 && startsWith(cygwinPath, "/cygdrive/")) { + if (cygwinPath.size() >= 11 && startsWith_(cygwinPath, "/cygdrive/")) { const unsigned char driveLetter = cygwinPath[10]; if (std::isalpha(driveLetter)) { if (cygwinPath.size() == 11) { @@ -2681,6 +2688,47 @@ static bool isCpp17OrLater(const simplecpp::DUI &dui) return !std_ver.empty() && (std_ver >= "201703L"); } + +static std::string currentDirectoryOSCalc() { + const std::size_t size = 4096; + char currentPath[size]; + +#ifndef _WIN32 + if (getcwd(currentPath, size) != nullptr) +#else + if (_getcwd(currentPath, size) != nullptr) +#endif + return std::string(currentPath); + + return ""; +} + +static const std::string& currentDirectory() { + static const std::string curdir = simplecpp::simplifyPath(currentDirectoryOSCalc()); + return curdir; +} + +static std::string toAbsolutePath(const std::string& path) { + if (path.empty()) { + return path;// preserve error file path that is indicated by an empty string + } + if (!isAbsolutePath(path)) { + return simplecpp::simplifyPath(currentDirectory() + "/" + path); + } + // otherwise + return simplecpp::simplifyPath(path); +} + +static std::pair extractRelativePathFromAbsolute(const std::string& absolutepath) { + static const std::string prefix = currentDirectory() + "/"; + if (startsWith_(absolutepath, prefix)) { + const std::size_t size = prefix.size(); + return std::make_pair(absolutepath.substr(size, absolutepath.size() - size), true); + } + // otherwise + return std::make_pair("", false); +} + static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI &dui) { @@ -3099,9 +3147,12 @@ static std::string openHeader(std::ifstream &f, const std::string &path) static std::string getRelativeFileName(const std::string &sourcefile, const std::string &header) { + std::string path; if (sourcefile.find_first_of("\\/") != std::string::npos) - return simplecpp::simplifyPath(sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header); - return simplecpp::simplifyPath(header); + path = sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header; + else + path = header; + return simplecpp::simplifyPath(path); } static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) @@ -3111,7 +3162,7 @@ static std::string openHeaderRelative(std::ifstream &f, const std::string &sourc static std::string getIncludePathFileName(const std::string &includePath, const std::string &header) { - std::string path = includePath; + std::string path = toAbsolutePath(includePath); if (!path.empty() && path[path.size()-1U]!='/' && path[path.size()-1U]!='\\') path += '/'; return path + header; @@ -3120,9 +3171,9 @@ static std::string getIncludePathFileName(const std::string &includePath, const static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) { for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string simplePath = openHeader(f, getIncludePathFileName(*it, header)); - if (!simplePath.empty()) - return simplePath; + std::string path = openHeader(f, getIncludePathFileName(*it, header)); + if (!path.empty()) + return path; } return ""; } @@ -3132,49 +3183,76 @@ static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const if (isAbsolutePath(header)) return openHeader(f, header); - std::string ret; - if (systemheader) { - ret = openHeaderIncludePath(f, dui, header); - return ret; + // always return absolute path for systemheaders + return toAbsolutePath(openHeaderIncludePath(f, dui, header)); } + std::string ret; + ret = openHeaderRelative(f, sourcefile, header); if (ret.empty()) - return openHeaderIncludePath(f, dui, header); + return toAbsolutePath(openHeaderIncludePath(f, dui, header));// in a similar way to system headers return ret; } -static std::string getFileName(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +static std::string findPathInMapBothRelativeAndAbsolute(const std::map &filedata, const std::string& path) { + // here there are two possibilities - either we match this from absolute path or from a relative one + if (filedata.find(path) != filedata.end()) {// try first to respect the exact match + return path; + } + // otherwise - try to use the normalize to the correct representation + if (isAbsolutePath(path)) { + const std::pair relativeExtractedResult = extractRelativePathFromAbsolute(path); + if (relativeExtractedResult.second) { + const std::string relativePath = relativeExtractedResult.first; + if (filedata.find(relativePath) != filedata.end()) { + return relativePath; + } + } + } else { + const std::string absolutePath = toAbsolutePath(path); + if (filedata.find(absolutePath) != filedata.end()) + return absolutePath; + } + // otherwise + return ""; +} + +static std::string getFileIdPath(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { if (filedata.empty()) { return ""; } if (isAbsolutePath(header)) { - return (filedata.find(header) != filedata.end()) ? simplecpp::simplifyPath(header) : ""; + const std::string simplifiedHeaderPath = simplecpp::simplifyPath(header); + return (filedata.find(simplifiedHeaderPath) != filedata.end()) ? simplifiedHeaderPath : ""; } if (!systemheader) { - const std::string relativeFilename = getRelativeFileName(sourcefile, header); - if (filedata.find(relativeFilename) != filedata.end()) - return relativeFilename; + const std::string relativeOrAbsoluteFilename = getRelativeFileName(sourcefile, header);// unknown if absolute or relative, but always simplified + const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, relativeOrAbsoluteFilename); + if (!match.empty()) { + return match; + } } for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string s = simplecpp::simplifyPath(getIncludePathFileName(*it, header)); - if (filedata.find(s) != filedata.end()) - return s; + const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, simplecpp::simplifyPath(getIncludePathFileName(*it, header))); + if (!match.empty()) { + return match; + } } if (systemheader && filedata.find(header) != filedata.end()) - return header; + return header;// system header that its file wasn't found in the included paths but alreasy in the filedata - return this as is return ""; } static bool hasFile(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { - return !getFileName(filedata, sourcefile, header, dui, systemheader).empty(); + return !getFileIdPath(filedata, sourcefile, header, dui, systemheader).empty(); } std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) @@ -3530,7 +3608,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const bool systemheader = (inctok->str()[0] == '<'); const std::string header(realFilename(inctok->str().substr(1U, inctok->str().size() - 2U))); - std::string header2 = getFileName(filedata, rawtok->location.file(), header, dui, systemheader); + std::string header2 = getFileIdPath(filedata, rawtok->location.file(), header, dui, systemheader); if (header2.empty()) { // try to load file.. std::ifstream f; diff --git a/testutils.py b/testutils.py new file mode 100644 index 00000000..55a2686d --- /dev/null +++ b/testutils.py @@ -0,0 +1,57 @@ +import os +import subprocess +import json + +def __run_subprocess(args, env=None, cwd=None, timeout=None): + p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, cwd=cwd) + + try: + stdout, stderr = p.communicate(timeout=timeout) + return_code = p.returncode + p = None + except subprocess.TimeoutExpired: + import psutil + # terminate all the child processes + child_procs = psutil.Process(p.pid).children(recursive=True) + if len(child_procs) > 0: + for child in child_procs: + child.terminate() + try: + # call with timeout since it might be stuck + p.communicate(timeout=5) + p = None + except subprocess.TimeoutExpired: + pass + raise + finally: + if p: + # sending the signal to the process groups causes the parent Python process to terminate as well + #os.killpg(os.getpgid(p.pid), signal.SIGTERM) # Send the signal to all the process groups + p.terminate() + stdout, stderr = p.communicate() + p = None + + stdout = stdout.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') + stderr = stderr.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') + + return return_code, stdout, stderr + +def simplecpp(args = [], cwd = None): + dir_path = os.path.dirname(os.path.realpath(__file__)) + if 'SIMPLECPP_EXE_PATH' in os.environ: + simplecpp_path = os.environ['SIMPLECPP_EXE_PATH'] + else: + simplecpp_path = os.path.join(dir_path, "simplecpp") + return __run_subprocess([simplecpp_path] + args, cwd = cwd) + +def quoted_string(s): + return json.dumps(str(s)) + +def format_include_path_arg(include_path): + return f"-I{str(include_path)}" + +def format_include(include, is_sys_header=False): + if is_sys_header: + return f"<{quoted_string(include)[1:-1]}>" + else: + return quoted_string(include) From 2bbb496fe2bdc558c05fb51ce3de0b54023f9007 Mon Sep 17 00:00:00 2001 From: Tal500 Date: Fri, 16 May 2025 14:18:04 +0300 Subject: [PATCH 13/41] Preserve relativeness of included paths (w.r.t. current directory) (#428) Co-authored-by: Tal Hadad --- integration_test.py | 24 ++++++++++++++----- simplecpp.cpp | 56 +++++++++++++++++++++++++++------------------ 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/integration_test.py b/integration_test.py index 0b2b0b38..4fe0129b 100644 --- a/integration_test.py +++ b/integration_test.py @@ -1,6 +1,7 @@ ## test with python -m pytest integration_test.py import os +import pathlib import pytest from testutils import simplecpp, format_include_path_arg, format_include @@ -14,6 +15,7 @@ def __test_relative_header_create_header(dir, with_pragma_once=True): #else #error header_was_already_included #endif + const int dummy = 1; """) return header_file, "error: #error header_was_already_included" @@ -48,33 +50,43 @@ def test_relative_header_1(tmpdir, with_pragma_once, is_sys): assert double_include_error in stderr @pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_2(tmpdir, inv): +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_2(tmpdir, inv, source_relative): header_file, _ = __test_relative_header_create_header(tmpdir) test_file = __test_relative_header_create_source(tmpdir, "test.h", header_file, inv=inv) - args = [test_file] + args = ["test.c" if source_relative else test_file] - _, _, stderr = simplecpp(args, cwd=tmpdir) + _, stdout, stderr = simplecpp(args, cwd=tmpdir) assert stderr == '' + if source_relative and not inv: + assert '#line 8 "test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout @pytest.mark.parametrize("is_sys", (False, True)) @pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_3(tmpdir, is_sys, inv): +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_3(tmpdir, is_sys, inv, source_relative): test_subdir = os.path.join(tmpdir, "test_subdir") os.mkdir(test_subdir) header_file, _ = __test_relative_header_create_header(test_subdir) test_file = __test_relative_header_create_source(tmpdir, "test_subdir/test.h", header_file, is_include1_sys=is_sys, inv=inv) - args = [test_file] + args = ["test.c" if source_relative else test_file] - _, _, stderr = simplecpp(args, cwd=tmpdir) + _, stdout, stderr = simplecpp(args, cwd=tmpdir) if is_sys: assert "missing header: Header not found" in stderr else: assert stderr == '' + if source_relative and not inv: + assert '#line 8 "test_subdir/test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout @pytest.mark.parametrize("use_short_path", (False, True)) @pytest.mark.parametrize("is_sys", (False, True)) diff --git a/simplecpp.cpp b/simplecpp.cpp index 25c4124a..d1fa91bf 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3145,11 +3145,11 @@ static std::string openHeader(std::ifstream &f, const std::string &path) return ""; } -static std::string getRelativeFileName(const std::string &sourcefile, const std::string &header) +static std::string getRelativeFileName(const std::string &baseFile, const std::string &header) { std::string path; - if (sourcefile.find_first_of("\\/") != std::string::npos) - path = sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header; + if (baseFile.find_first_of("\\/") != std::string::npos) + path = baseFile.substr(0, baseFile.find_last_of("\\/") + 1U) + header; else path = header; return simplecpp::simplifyPath(path); @@ -3160,12 +3160,22 @@ static std::string openHeaderRelative(std::ifstream &f, const std::string &sourc return openHeader(f, getRelativeFileName(sourcefile, header)); } +// returns the simplified header path: +// * If the header path is absolute, returns it in absolute path +// * Otherwise, returns it in relative path with respect to the current directory static std::string getIncludePathFileName(const std::string &includePath, const std::string &header) { - std::string path = toAbsolutePath(includePath); - if (!path.empty() && path[path.size()-1U]!='/' && path[path.size()-1U]!='\\') - path += '/'; - return path + header; + std::string simplifiedHeader = simplecpp::simplifyPath(header); + + if (isAbsolutePath(simplifiedHeader)) { + return simplifiedHeader; + } + + std::string basePath = toAbsolutePath(includePath); + if (!basePath.empty() && basePath[basePath.size()-1U]!='/' && basePath[basePath.size()-1U]!='\\') + basePath += '/'; + const std::string absolutesimplifiedHeaderPath = basePath + simplifiedHeader; + return extractRelativePathFromAbsolute(absolutesimplifiedHeaderPath).first; } static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) @@ -3183,17 +3193,16 @@ static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const if (isAbsolutePath(header)) return openHeader(f, header); - if (systemheader) { - // always return absolute path for systemheaders - return toAbsolutePath(openHeaderIncludePath(f, dui, header)); + // prefer first to search the header relatively to source file if found, when not a system header + if (!systemheader) { + std::string relativeHeader = openHeaderRelative(f, sourcefile, header); + if (!relativeHeader.empty()) { + return relativeHeader; + } } - std::string ret; - - ret = openHeaderRelative(f, sourcefile, header); - if (ret.empty()) - return toAbsolutePath(openHeaderIncludePath(f, dui, header));// in a similar way to system headers - return ret; + // search the header on the include paths (provided by the flags "-I...") + return openHeaderIncludePath(f, dui, header); } static std::string findPathInMapBothRelativeAndAbsolute(const std::map &filedata, const std::string& path) { @@ -3212,8 +3221,9 @@ static std::string findPathInMapBothRelativeAndAbsolute(const std::map::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, simplecpp::simplifyPath(getIncludePathFileName(*it, header))); + const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, getIncludePathFileName(*it, header)); if (!match.empty()) { return match; } } - if (systemheader && filedata.find(header) != filedata.end()) - return header;// system header that its file wasn't found in the included paths but alreasy in the filedata - return this as is - return ""; } From 76df97f64b2906d01587b379511c2d9836568c87 Mon Sep 17 00:00:00 2001 From: Tal500 Date: Fri, 30 May 2025 17:44:43 +0300 Subject: [PATCH 14/41] fix: parent relative paths, and rework on the whole path extraction mechanics (#429) --- .github/workflows/CI-unixish.yml | 2 +- .github/workflows/CI-windows.yml | 2 +- integration_test.py | 103 ++++++++++++++++++++++++++----- simplecpp.cpp | 98 ++++++++++++++++++++--------- 4 files changed, 159 insertions(+), 46 deletions(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index c84fc052..f5a78ea5 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -53,7 +53,7 @@ jobs: - name: integration test run: | - python3 -m pytest integration_test.py + python3 -m pytest integration_test.py -vv - name: Run CMake run: | diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 3e017182..971f3827 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -57,5 +57,5 @@ jobs: - name: integration test run: | set SIMPLECPP_EXE_PATH=.\${{ matrix.config }}\simplecpp.exe - python -m pytest integration_test.py || exit /b !errorlevel! + python -m pytest integration_test.py -vv || exit /b !errorlevel! diff --git a/integration_test.py b/integration_test.py index 4fe0129b..122ce9aa 100644 --- a/integration_test.py +++ b/integration_test.py @@ -35,40 +35,48 @@ def __test_relative_header_create_source(dir, include1, include2, is_include1_sy @pytest.mark.parametrize("with_pragma_once", (False, True)) @pytest.mark.parametrize("is_sys", (False, True)) -def test_relative_header_1(tmpdir, with_pragma_once, is_sys): +def test_relative_header_1(record_property, tmpdir, with_pragma_once, is_sys): _, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) test_file = __test_relative_header_create_source(tmpdir, "test.h", "test.h", is_include1_sys=is_sys, is_include2_sys=is_sys) args = ([format_include_path_arg(tmpdir)] if is_sys else []) + [test_file] - _, _, stderr = simplecpp(args, cwd=tmpdir) + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) if with_pragma_once: assert stderr == '' else: assert double_include_error in stderr +@pytest.mark.parametrize("with_pragma_once", (False, True)) @pytest.mark.parametrize("inv", (False, True)) @pytest.mark.parametrize("source_relative", (False, True)) -def test_relative_header_2(tmpdir, inv, source_relative): - header_file, _ = __test_relative_header_create_header(tmpdir) +def test_relative_header_2(record_property, tmpdir, with_pragma_once, inv, source_relative): + header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) test_file = __test_relative_header_create_source(tmpdir, "test.h", header_file, inv=inv) args = ["test.c" if source_relative else test_file] _, stdout, stderr = simplecpp(args, cwd=tmpdir) - assert stderr == '' - if source_relative and not inv: - assert '#line 8 "test.h"' in stdout + record_property("stdout", stdout) + record_property("stderr", stderr) + if with_pragma_once: + assert stderr == '' + if inv: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + else: + assert '#line 8 "test.h"' in stdout else: - assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + assert double_include_error in stderr @pytest.mark.parametrize("is_sys", (False, True)) @pytest.mark.parametrize("inv", (False, True)) @pytest.mark.parametrize("source_relative", (False, True)) -def test_relative_header_3(tmpdir, is_sys, inv, source_relative): +def test_relative_header_3(record_property, tmpdir, is_sys, inv, source_relative): test_subdir = os.path.join(tmpdir, "test_subdir") os.mkdir(test_subdir) header_file, _ = __test_relative_header_create_header(test_subdir) @@ -78,20 +86,23 @@ def test_relative_header_3(tmpdir, is_sys, inv, source_relative): args = ["test.c" if source_relative else test_file] _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) if is_sys: assert "missing header: Header not found" in stderr else: assert stderr == '' - if source_relative and not inv: - assert '#line 8 "test_subdir/test.h"' in stdout - else: + if inv: assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout + else: + assert '#line 8 "test_subdir/test.h"' in stdout @pytest.mark.parametrize("use_short_path", (False, True)) +@pytest.mark.parametrize("relative_include_dir", (False, True)) @pytest.mark.parametrize("is_sys", (False, True)) @pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_4(tmpdir, use_short_path, is_sys, inv): +def test_relative_header_4(record_property, tmpdir, use_short_path, relative_include_dir, is_sys, inv): test_subdir = os.path.join(tmpdir, "test_subdir") os.mkdir(test_subdir) header_file, _ = __test_relative_header_create_header(test_subdir) @@ -100,7 +111,69 @@ def test_relative_header_4(tmpdir, use_short_path, is_sys, inv): test_file = __test_relative_header_create_source(tmpdir, header_file, "test.h", is_include2_sys=is_sys, inv=inv) - args = [format_include_path_arg(test_subdir), test_file] + args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), test_file] - _, _, stderr = simplecpp(args, cwd=tmpdir) + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) assert stderr == '' + if (use_short_path and not inv) or (relative_include_dir and inv): + assert '#line 8 "test_subdir/test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("relative_include_dir", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_5(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv): # test relative paths with .. + ## in this test, the subdir role is the opposite then the previous - it contains the test.c file, while the parent tmpdir contains the header file + header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + if is_sys: + header_file_second_path = "test.h" + else: + header_file_second_path = "../test.h" + + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + test_file = __test_relative_header_create_source(test_subdir, header_file, header_file_second_path, is_include2_sys=is_sys, inv=inv) + + args = ([format_include_path_arg(".." if relative_include_dir else tmpdir)] if is_sys else []) + ["test.c"] + + _, stdout, stderr = simplecpp(args, cwd=test_subdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + if with_pragma_once: + assert stderr == '' + if (relative_include_dir or not is_sys) and inv: + assert '#line 8 "../test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + else: + assert double_include_error in stderr + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("relative_include_dir", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +def test_relative_header_6(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv): # test relative paths with .. that is resolved only by an include dir + ## in this test, both the header and the source file are at the same dir, but there is a dummy inclusion dir as a subdir + header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + test_file = __test_relative_header_create_source(tmpdir, header_file, "../test.h", is_include2_sys=is_sys, inv=inv) + + args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), "test.c"] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + if with_pragma_once: + assert stderr == '' + if relative_include_dir and inv: + assert '#line 8 "test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + else: + assert double_include_error in stderr diff --git a/simplecpp.cpp b/simplecpp.cpp index d1fa91bf..9ff66d8b 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -2719,14 +2719,42 @@ static std::string toAbsolutePath(const std::string& path) { return simplecpp::simplifyPath(path); } -static std::pair extractRelativePathFromAbsolute(const std::string& absolutepath) { - static const std::string prefix = currentDirectory() + "/"; - if (startsWith_(absolutepath, prefix)) { - const std::size_t size = prefix.size(); - return std::make_pair(absolutepath.substr(size, absolutepath.size() - size), true); +static std::string dirPath(const std::string& path, bool withTrailingSlash=true) { + const std::size_t lastSlash = path.find_last_of("\\/"); + if (lastSlash == std::string::npos) { + return ""; } - // otherwise - return std::make_pair("", false); + return path.substr(0, lastSlash + (withTrailingSlash ? 1U : 0U)); +} + +static std::string omitPathTrailingSlash(const std::string& path) { + if (endsWith(path, "/")) { + return path.substr(0, path.size() - 1U); + } + return path; +} + +static std::string extractRelativePathFromAbsolute(const std::string& absoluteSimplifiedPath, const std::string& prefixSimplifiedAbsoluteDir = currentDirectory()) { + const std::string normalizedAbsolutePath = omitPathTrailingSlash(absoluteSimplifiedPath); + std::string currentPrefix = omitPathTrailingSlash(prefixSimplifiedAbsoluteDir); + std::string leadingParenting; + while (!startsWith_(normalizedAbsolutePath, currentPrefix)) { + leadingParenting = "../" + leadingParenting; + currentPrefix = dirPath(currentPrefix, false); + } + const std::size_t size = currentPrefix.size(); + std::string relativeFromMeetingPath = normalizedAbsolutePath.substr(size, normalizedAbsolutePath.size() - size); + if (currentPrefix.empty() && !(startsWith_(absoluteSimplifiedPath, "/") && startsWith_(prefixSimplifiedAbsoluteDir, "/"))) { + // In the case that there is no common prefix path, + // and at not both of the paths start with `/` (can happen only in Windows paths on distinct partitions), + // return the absolute simplified path as is because no relative path can match. + return absoluteSimplifiedPath; + } + if (startsWith_(relativeFromMeetingPath, "/")) { + // omit the leading slash + relativeFromMeetingPath = relativeFromMeetingPath.substr(1, relativeFromMeetingPath.size()); + } + return leadingParenting + relativeFromMeetingPath; } static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); @@ -3147,12 +3175,17 @@ static std::string openHeader(std::ifstream &f, const std::string &path) static std::string getRelativeFileName(const std::string &baseFile, const std::string &header) { - std::string path; - if (baseFile.find_first_of("\\/") != std::string::npos) - path = baseFile.substr(0, baseFile.find_last_of("\\/") + 1U) + header; - else - path = header; - return simplecpp::simplifyPath(path); + const std::string baseFileSimplified = simplecpp::simplifyPath(baseFile); + const std::string baseFileAbsolute = isAbsolutePath(baseFileSimplified) ? + baseFileSimplified : + simplecpp::simplifyPath(currentDirectory() + "/" + baseFileSimplified); + + const std::string headerSimplified = simplecpp::simplifyPath(header); + const std::string path = isAbsolutePath(headerSimplified) ? + headerSimplified : + simplecpp::simplifyPath(dirPath(baseFileAbsolute) + headerSimplified); + + return extractRelativePathFromAbsolute(path); } static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) @@ -3174,8 +3207,9 @@ static std::string getIncludePathFileName(const std::string &includePath, const std::string basePath = toAbsolutePath(includePath); if (!basePath.empty() && basePath[basePath.size()-1U]!='/' && basePath[basePath.size()-1U]!='\\') basePath += '/'; - const std::string absolutesimplifiedHeaderPath = basePath + simplifiedHeader; - return extractRelativePathFromAbsolute(absolutesimplifiedHeaderPath).first; + const std::string absoluteSimplifiedHeaderPath = simplecpp::simplifyPath(basePath + simplifiedHeader); + // preserve absoluteness/relativieness of the including dir + return isAbsolutePath(includePath) ? absoluteSimplifiedHeaderPath : extractRelativePathFromAbsolute(absoluteSimplifiedHeaderPath); } static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) @@ -3210,22 +3244,18 @@ static std::string findPathInMapBothRelativeAndAbsolute(const std::map relativeExtractedResult = extractRelativePathFromAbsolute(path); - if (relativeExtractedResult.second) { - const std::string relativePath = relativeExtractedResult.first; - if (filedata.find(relativePath) != filedata.end()) { - return relativePath; - } - } + alternativePath = extractRelativePathFromAbsolute(simplecpp::simplifyPath(path)); } else { - const std::string absolutePath = toAbsolutePath(path); - if (filedata.find(absolutePath) != filedata.end()) { - return absolutePath; - } + alternativePath = toAbsolutePath(path); + } + + if (filedata.find(alternativePath) != filedata.end()) { + return alternativePath; } - // otherwise return ""; } @@ -3267,6 +3297,16 @@ static bool hasFile(const std::map &filedat return !getFileIdPath(filedata, sourcefile, header, dui, systemheader).empty(); } +static void safeInsertTokenListToMap(std::map &filedata, const std::string &header2, simplecpp::TokenList *tokens, const std::string &header, const std::string &sourcefile, bool systemheader, const char* contextDesc) +{ + const bool inserted = filedata.insert(std::make_pair(header2, tokens)).second; + if (!inserted) { + std::cerr << "error in " << contextDesc << " - attempt to add a tokenized file to the file map, but this file is already in the map! Details:" << + "header: " << header << " header2: " << header2 << " source: " << sourcefile << " systemheader: " << systemheader << std::endl; + std::abort(); + } +} + std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) { #ifdef SIMPLECPP_WINDOWS @@ -3343,7 +3383,7 @@ std::map simplecpp::load(const simplecpp::To TokenList *tokens = new TokenList(header2, filenames, outputList); if (dui.removeComments) tokens->removeComments(); - ret[header2] = tokens; + safeInsertTokenListToMap(ret, header2, tokens, header, rawtok->location.file(), systemheader, "simplecpp::load"); if (tokens->front()) filelist.push_back(tokens->front()); } @@ -3630,7 +3670,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL TokenList * const tokens = new TokenList(header2, files, outputList); if (dui.removeComments) tokens->removeComments(); - filedata[header2] = tokens; + safeInsertTokenListToMap(filedata, header2, tokens, header, rawtok->location.file(), systemheader, "simplecpp::preprocess"); } } if (header2.empty()) { From 9ba39709e34a8ccaee5c7afa0d87dd478eaba959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Thu, 12 Jun 2025 11:23:30 +0200 Subject: [PATCH 15/41] aligned GCC warnings with Cppcheck (#434) --- CMakeLists.txt | 19 ++++++++++++++++++- test.cpp | 6 +++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c3fcf4ba..672e63bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,24 @@ function(add_compile_options_safe FLAG) endfunction() if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") - add_compile_options(-Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wshadow -Wundef -Wold-style-cast -Wno-multichar) + add_compile_options(-pedantic) + add_compile_options(-Wall) + add_compile_options(-Wextra) + add_compile_options(-Wcast-qual) # Cast for removing type qualifiers + add_compile_options(-Wfloat-equal) # Floating values used in equality comparisons + add_compile_options(-Wmissing-declarations) # If a global function is defined without a previous declaration + add_compile_options(-Wmissing-format-attribute) # + add_compile_options(-Wno-long-long) + add_compile_options(-Wpacked) # + add_compile_options(-Wredundant-decls) # if anything is declared more than once in the same scope + add_compile_options(-Wundef) + add_compile_options(-Wno-missing-braces) + add_compile_options(-Wno-sign-compare) + add_compile_options(-Wno-multichar) + add_compile_options(-Woverloaded-virtual) # when a function declaration hides virtual functions from a base class + + add_compile_options(-Wsuggest-attribute=noreturn) + add_compile_options_safe(-Wuseless-cast) elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") add_compile_definitions(_CRT_SECURE_NO_WARNINGS) elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") diff --git a/test.cpp b/test.cpp index 187b7ec6..86e6ddc8 100644 --- a/test.cpp +++ b/test.cpp @@ -279,9 +279,9 @@ static void characterLiteral() #ifdef __GNUC__ // BEGIN Implementation-specific results - ASSERT_EQUALS(static_cast('AB'), simplecpp::characterLiteralToLL("'AB'")); - ASSERT_EQUALS(static_cast('ABC'), simplecpp::characterLiteralToLL("'ABC'")); - ASSERT_EQUALS(static_cast('ABCD'), simplecpp::characterLiteralToLL("'ABCD'")); + ASSERT_EQUALS('AB', simplecpp::characterLiteralToLL("'AB'")); + ASSERT_EQUALS('ABC', simplecpp::characterLiteralToLL("'ABC'")); + ASSERT_EQUALS('ABCD', simplecpp::characterLiteralToLL("'ABCD'")); ASSERT_EQUALS('\134t', simplecpp::characterLiteralToLL("'\\134t'")); // cppcheck ticket #7452 // END Implementation-specific results #endif From 6cc4f53f4c7ff2adebec67ef21795260355478c7 Mon Sep 17 00:00:00 2001 From: glankk Date: Thu, 12 Jun 2025 17:23:14 +0200 Subject: [PATCH 16/41] Add encoding to open() in run-tests.py (#440) --- run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run-tests.py b/run-tests.py index 2f28bf0f..5017f4c1 100644 --- a/run-tests.py +++ b/run-tests.py @@ -16,7 +16,7 @@ def cleanup(out): commands = [] for f in sorted(glob.glob(os.path.expanduser('testsuite/clang-preprocessor-tests/*.c*'))): - for line in open(f, 'rt'): + for line in open(f, 'rt', encoding='utf-8'): if line.startswith('// RUN: %clang_cc1 '): cmd = '' for arg in line[19:].split(): From eb18d11f0fc2aa70330dc5592629dc06ed0213f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Fri, 13 Jun 2025 22:26:46 +0200 Subject: [PATCH 17/41] main.cpp: added option `-f` to fail on output error (#436) --- main.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/main.cpp b/main.cpp index f05cf793..424ef6fa 100644 --- a/main.cpp +++ b/main.cpp @@ -18,6 +18,7 @@ int main(int argc, char **argv) bool error = false; const char *filename = nullptr; bool use_istream = false; + bool fail_on_error = false; // Settings.. simplecpp::DUI dui; @@ -70,6 +71,10 @@ int main(int argc, char **argv) error_only = true; found = true; break; + case 'f': + fail_on_error = true; + found = true; + break; } if (!found) { std::cout << "error: option '" << arg << "' is unknown." << std::endl; @@ -172,5 +177,8 @@ int main(int argc, char **argv) } } + if (fail_on_error && !outputList.empty()) + return 1; + return 0; } From a46cb125243f4d6e743eceeba37a485ad131d420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Fri, 13 Jun 2025 22:33:42 +0200 Subject: [PATCH 18/41] selfcheck.sh: actually fail if we encountered unexpected errors (#437) --- selfcheck.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/selfcheck.sh b/selfcheck.sh index 3518c654..a43ef8c5 100755 --- a/selfcheck.sh +++ b/selfcheck.sh @@ -1,6 +1,11 @@ #!/bin/sh -output=$(./simplecpp simplecpp.cpp -e 2>&1) +output=$(./simplecpp simplecpp.cpp -e -f 2>&1) ec=$? -echo "$output" | grep -v 'Header not found: <' -exit $ec \ No newline at end of file +errors=$(echo "$output" | grep -v 'Header not found: <') +if [ $ec -ne 0 ]; then + # only fail if got errors which do not refer to missing system includes + if [ ! -z "$errors" ]; then + exit $ec + fi +fi \ No newline at end of file From 8e5c5f49ca66c053c9ab5bd699b7517b5b796536 Mon Sep 17 00:00:00 2001 From: glankk Date: Mon, 16 Jun 2025 13:59:17 +0200 Subject: [PATCH 19/41] Fix #381 (When there are header files with the same name, the correct file cannot be found) (#443) --- integration_test.py | 111 ++++++++++++++++++++++++++++++++++++++++++++ simplecpp.cpp | 7 +++ test.cpp | 2 + 3 files changed, 120 insertions(+) diff --git a/integration_test.py b/integration_test.py index 122ce9aa..ccef84eb 100644 --- a/integration_test.py +++ b/integration_test.py @@ -2,6 +2,7 @@ import os import pathlib +import platform import pytest from testutils import simplecpp, format_include_path_arg, format_include @@ -177,3 +178,113 @@ def test_relative_header_6(record_property, tmpdir, with_pragma_once, relative_i assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout else: assert double_include_error in stderr + +def test_same_name_header(record_property, tmpdir): + include_a = os.path.join(tmpdir, "include_a") + include_b = os.path.join(tmpdir, "include_b") + + test_file = os.path.join(tmpdir, "test.c") + header_a = os.path.join(include_a, "header_a.h") + header_b = os.path.join(include_b, "header_b.h") + same_name_a = os.path.join(include_a, "same_name.h") + same_name_b = os.path.join(include_b, "same_name.h") + + os.mkdir(include_a) + os.mkdir(include_b) + + with open(test_file, "wt") as f: + f.write(""" + #include + #include + TEST + """) + + with open(header_a, "wt") as f: + f.write(""" + #include "same_name.h" + """) + + with open(header_b, "wt") as f: + f.write(""" + #include "same_name.h" + """) + + with open(same_name_a, "wt") as f: + f.write(""" + #define TEST E + """) + + with open(same_name_b, "wt") as f: + f.write(""" + #define TEST OK + """) + + args = [ + format_include_path_arg(include_a), + format_include_path_arg(include_b), + test_file + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert "OK" in stdout + assert stderr == "" + +def test_pragma_once_matching(record_property, tmpdir): + if platform.system() == "win32": + names_to_test = [ + '"once.h"', + '"Once.h"', + '', + '', + '"../test_dir/once.h"', + '"../test_dir/Once.h"', + '"../Test_Dir/once.h"', + '"../Test_Dir/Once.h"', + '"test_subdir/../once.h"', + '"test_subdir/../Once.h"', + '"Test_Subdir/../once.h"', + '"Test_Subdir/../Once.h"', + ] + else: + names_to_test = [ + '"once.h"', + '', + '"../test_dir/once.h"', + '"test_subdir/../once.h"', + ] + + test_dir = os.path.join(tmpdir, "test_dir") + test_subdir = os.path.join(test_dir, "test_subdir") + + test_file = os.path.join(test_dir, "test.c") + once_header = os.path.join(test_dir, "once.h") + + os.mkdir(test_dir) + os.mkdir(test_subdir) + + with open(test_file, "wt") as f: + for n in names_to_test: + f.write(f""" + #include {n} + """); + + with open(once_header, "wt") as f: + f.write(f""" + #pragma once + ONCE + """); + + args = [ + format_include_path_arg(test_dir), + test_file + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert stdout.count("ONCE") == 1 + assert stderr == "" diff --git a/simplecpp.cpp b/simplecpp.cpp index 9ff66d8b..e4f98bb6 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3278,6 +3278,13 @@ static std::string getFileIdPath(const std::map Date: Fri, 20 Jun 2025 21:33:36 +0200 Subject: [PATCH 20/41] Fix #446 (change the rules of relativeness preserving to depend on the source file including it for relative path includes) (#445) --- integration_test.py | 28 ++++++++++++++++------------ simplecpp.cpp | 12 ++++++------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/integration_test.py b/integration_test.py index ccef84eb..27528e16 100644 --- a/integration_test.py +++ b/integration_test.py @@ -67,7 +67,7 @@ def test_relative_header_2(record_property, tmpdir, with_pragma_once, inv, sourc record_property("stderr", stderr) if with_pragma_once: assert stderr == '' - if inv: + if inv or not source_relative: assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout else: assert '#line 8 "test.h"' in stdout @@ -94,16 +94,17 @@ def test_relative_header_3(record_property, tmpdir, is_sys, inv, source_relative assert "missing header: Header not found" in stderr else: assert stderr == '' - if inv: - assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout - else: + if source_relative and not inv: assert '#line 8 "test_subdir/test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout @pytest.mark.parametrize("use_short_path", (False, True)) @pytest.mark.parametrize("relative_include_dir", (False, True)) @pytest.mark.parametrize("is_sys", (False, True)) @pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_4(record_property, tmpdir, use_short_path, relative_include_dir, is_sys, inv): +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_4(record_property, tmpdir, use_short_path, relative_include_dir, is_sys, inv, source_relative): test_subdir = os.path.join(tmpdir, "test_subdir") os.mkdir(test_subdir) header_file, _ = __test_relative_header_create_header(test_subdir) @@ -112,13 +113,14 @@ def test_relative_header_4(record_property, tmpdir, use_short_path, relative_inc test_file = __test_relative_header_create_source(tmpdir, header_file, "test.h", is_include2_sys=is_sys, inv=inv) - args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), test_file] + args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), "test.c" if source_relative else test_file] _, stdout, stderr = simplecpp(args, cwd=tmpdir) record_property("stdout", stdout) record_property("stderr", stderr) + assert stderr == '' - if (use_short_path and not inv) or (relative_include_dir and inv): + if (source_relative and use_short_path and not inv) or (relative_include_dir and inv): assert '#line 8 "test_subdir/test.h"' in stdout else: assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout @@ -127,7 +129,8 @@ def test_relative_header_4(record_property, tmpdir, use_short_path, relative_inc @pytest.mark.parametrize("relative_include_dir", (False, True)) @pytest.mark.parametrize("is_sys", (False, True)) @pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_5(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv): # test relative paths with .. +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_5(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv, source_relative): # test relative paths with .. ## in this test, the subdir role is the opposite then the previous - it contains the test.c file, while the parent tmpdir contains the header file header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) if is_sys: @@ -139,14 +142,14 @@ def test_relative_header_5(record_property, tmpdir, with_pragma_once, relative_i os.mkdir(test_subdir) test_file = __test_relative_header_create_source(test_subdir, header_file, header_file_second_path, is_include2_sys=is_sys, inv=inv) - args = ([format_include_path_arg(".." if relative_include_dir else tmpdir)] if is_sys else []) + ["test.c"] + args = ([format_include_path_arg(".." if relative_include_dir else tmpdir)] if is_sys else []) + ["test.c" if source_relative else test_file] _, stdout, stderr = simplecpp(args, cwd=test_subdir) record_property("stdout", stdout) record_property("stderr", stderr) if with_pragma_once: assert stderr == '' - if (relative_include_dir or not is_sys) and inv: + if (relative_include_dir if is_sys else source_relative) and inv: assert '#line 8 "../test.h"' in stdout else: assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout @@ -157,7 +160,8 @@ def test_relative_header_5(record_property, tmpdir, with_pragma_once, relative_i @pytest.mark.parametrize("relative_include_dir", (False, True)) @pytest.mark.parametrize("is_sys", (False, True)) @pytest.mark.parametrize("inv", (False, True)) -def test_relative_header_6(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv): # test relative paths with .. that is resolved only by an include dir +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_6(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv, source_relative): # test relative paths with .. that is resolved only by an include dir ## in this test, both the header and the source file are at the same dir, but there is a dummy inclusion dir as a subdir header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) @@ -165,7 +169,7 @@ def test_relative_header_6(record_property, tmpdir, with_pragma_once, relative_i os.mkdir(test_subdir) test_file = __test_relative_header_create_source(tmpdir, header_file, "../test.h", is_include2_sys=is_sys, inv=inv) - args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), "test.c"] + args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), "test.c" if source_relative else test_file] _, stdout, stderr = simplecpp(args, cwd=tmpdir) record_property("stdout", stdout) diff --git a/simplecpp.cpp b/simplecpp.cpp index e4f98bb6..599ffdfe 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3173,7 +3173,7 @@ static std::string openHeader(std::ifstream &f, const std::string &path) return ""; } -static std::string getRelativeFileName(const std::string &baseFile, const std::string &header) +static std::string getRelativeFileName(const std::string &baseFile, const std::string &header, bool returnAbsolutePath) { const std::string baseFileSimplified = simplecpp::simplifyPath(baseFile); const std::string baseFileAbsolute = isAbsolutePath(baseFileSimplified) ? @@ -3185,12 +3185,12 @@ static std::string getRelativeFileName(const std::string &baseFile, const std::s headerSimplified : simplecpp::simplifyPath(dirPath(baseFileAbsolute) + headerSimplified); - return extractRelativePathFromAbsolute(path); + return returnAbsolutePath ? toAbsolutePath(path) : extractRelativePathFromAbsolute(path); } static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) { - return openHeader(f, getRelativeFileName(sourcefile, header)); + return openHeader(f, getRelativeFileName(sourcefile, header, isAbsolutePath(sourcefile))); } // returns the simplified header path: @@ -3273,14 +3273,14 @@ static std::string getFileIdPath(const std::map Date: Tue, 1 Jul 2025 16:32:07 +0200 Subject: [PATCH 21/41] Fix #368 (__VA_OPT__ is not handled good enough) (#451) --- simplecpp.cpp | 111 ++++++++++++++---- test.cpp | 51 +++++++- .../macro_fn_va_opt.c | 13 ++ 3 files changed, 148 insertions(+), 27 deletions(-) create mode 100644 testsuite/clang-preprocessor-tests/macro_fn_va_opt.c diff --git a/simplecpp.cpp b/simplecpp.cpp index 599ffdfe..97657d60 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -1485,7 +1485,7 @@ namespace simplecpp { class Macro { public: - explicit Macro(std::vector &f) : nameTokDef(nullptr), valueToken(nullptr), endToken(nullptr), files(f), tokenListDefine(f), variadic(false), valueDefinedInCode_(false) {} + explicit Macro(std::vector &f) : nameTokDef(nullptr), valueToken(nullptr), endToken(nullptr), files(f), tokenListDefine(f), variadic(false), variadicOpt(false), optExpandValue(nullptr), optNoExpandValue(nullptr), valueDefinedInCode_(false) {} Macro(const Token *tok, std::vector &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(true) { if (sameline(tok->previousSkipComments(), tok)) @@ -1515,6 +1515,11 @@ namespace simplecpp { *this = other; } + ~Macro() { + delete optExpandValue; + delete optNoExpandValue; + } + Macro &operator=(const Macro &other) { if (this != &other) { files = other.files; @@ -1707,6 +1712,9 @@ namespace simplecpp { bool parseDefine(const Token *nametoken) { nameTokDef = nametoken; variadic = false; + variadicOpt = false; + optExpandValue = nullptr; + optNoExpandValue = nullptr; if (!nameTokDef) { valueToken = endToken = nullptr; args.clear(); @@ -1744,8 +1752,54 @@ namespace simplecpp { if (!sameline(valueToken, nameTokDef)) valueToken = nullptr; endToken = valueToken; - while (sameline(endToken, nameTokDef)) + while (sameline(endToken, nameTokDef)) { + if (variadic && endToken->str() == "__VA_OPT__") + variadicOpt = true; endToken = endToken->next; + } + + if (variadicOpt) { + TokenList expandValue(files); + TokenList noExpandValue(files); + for (const Token *tok = valueToken; tok && tok != endToken;) { + if (tok->str() == "__VA_OPT__") { + if (!sameline(tok, tok->next) || tok->next->op != '(') + throw Error(tok->location, "In definition of '" + nameTokDef->str() + "': Missing opening parenthesis for __VA_OPT__"); + tok = tok->next->next; + int par = 1; + while (tok && tok != endToken) { + if (tok->op == '(') + par++; + else if (tok->op == ')') + par--; + else if (tok->str() == "__VA_OPT__") + throw Error(tok->location, "In definition of '" + nameTokDef->str() + "': __VA_OPT__ cannot be nested"); + if (par == 0) { + tok = tok->next; + break; + } + expandValue.push_back(new Token(*tok)); + tok = tok->next; + } + if (par != 0) { + const Token *const lastTok = expandValue.back() ? expandValue.back() : valueToken->next; + throw Error(lastTok->location, "In definition of '" + nameTokDef->str() + "': Missing closing parenthesis for __VA_OPT__"); + } + } else { + expandValue.push_back(new Token(*tok)); + noExpandValue.push_back(new Token(*tok)); + tok = tok->next; + } + } +#if __cplusplus >= 201103L + optExpandValue = new TokenList(std::move(expandValue)); + optNoExpandValue = new TokenList(std::move(noExpandValue)); +#else + optExpandValue = new TokenList(expandValue); + optNoExpandValue = new TokenList(noExpandValue); +#endif + } + return true; } @@ -1900,8 +1954,22 @@ namespace simplecpp { Token * const output_end_1 = output->back(); + const Token *valueToken2; + const Token *endToken2; + + if (variadicOpt) { + if (parametertokens2.size() > args.size() && parametertokens2[args.size() - 1]->next->op != ')') + valueToken2 = optExpandValue->cfront(); + else + valueToken2 = optNoExpandValue->cfront(); + endToken2 = nullptr; + } else { + valueToken2 = valueToken; + endToken2 = endToken; + } + // expand - for (const Token *tok = valueToken; tok != endToken;) { + for (const Token *tok = valueToken2; tok != endToken2;) { if (tok->op != '#') { // A##B => AB if (sameline(tok, tok->next) && tok->next && tok->next->op == '#' && tok->next->next && tok->next->next->op == '#') { @@ -1950,7 +2018,7 @@ namespace simplecpp { } tok = tok->next; - if (tok == endToken) { + if (tok == endToken2) { output->push_back(new Token(*tok->previous)); break; } @@ -2020,24 +2088,6 @@ namespace simplecpp { // Macro parameter.. { TokenList temp(files); - if (tok->str() == "__VA_OPT__") { - if (sameline(tok, tok->next) && tok->next->str() == "(") { - tok = tok->next; - int paren = 1; - while (sameline(tok, tok->next)) { - if (tok->next->str() == "(") - ++paren; - else if (tok->next->str() == ")") - --paren; - if (paren == 0) - return tok->next->next; - tok = tok->next; - if (parametertokens.size() > args.size() && parametertokens.front()->next->str() != ")") - tok = expandToken(output, loc, tok, macros, expandedmacros, parametertokens)->previous; - } - } - throw Error(tok->location, "Missing parenthesis for __VA_OPT__(content)"); - } if (expandArg(&temp, tok, loc, macros, expandedmacros, parametertokens)) { if (tok->str() == "__VA_ARGS__" && temp.empty() && output->cback() && output->cback()->str() == "," && tok->nextSkipComments() && tok->nextSkipComments()->str() == ")") @@ -2338,6 +2388,13 @@ namespace simplecpp { /** is macro variadic? */ bool variadic; + /** does the macro expansion have __VA_OPT__? */ + bool variadicOpt; + + /** Expansion value for varadic macros with __VA_OPT__ expanded and discarded respectively */ + const TokenList *optExpandValue; + const TokenList *optNoExpandValue; + /** was the value of this macro actually defined in the code? */ bool valueDefinedInCode_; }; @@ -3621,6 +3678,16 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL } output.clear(); return; + } catch (simplecpp::Macro::Error &err) { + if (outputList) { + simplecpp::Output out(files); + out.type = simplecpp::Output::SYNTAX_ERROR; + out.location = err.location; + out.msg = "Failed to parse #define, " + err.what; + outputList->push_back(out); + } + output.clear(); + return; } } else if (ifstates.top() == True && rawtok->str() == INCLUDE) { TokenList inc1(files); diff --git a/test.cpp b/test.cpp index cec253b8..caa6137e 100644 --- a/test.cpp +++ b/test.cpp @@ -923,7 +923,7 @@ static void define_va_opt_3() simplecpp::OutputList outputList; ASSERT_EQUALS("", preprocess(code1, &outputList)); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'err', Missing parenthesis for __VA_OPT__(content)\n", + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing closing parenthesis for __VA_OPT__\n", toString(outputList)); outputList.clear(); @@ -934,7 +934,7 @@ static void define_va_opt_3() "err()"; ASSERT_EQUALS("", preprocess(code2, &outputList)); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'err', Missing parenthesis for __VA_OPT__(content)\n", + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", toString(outputList)); } @@ -946,7 +946,7 @@ static void define_va_opt_4() simplecpp::OutputList outputList; ASSERT_EQUALS("", preprocess(code1, &outputList)); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'err', Missing parenthesis for __VA_OPT__(content)\n", + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", toString(outputList)); outputList.clear(); @@ -956,7 +956,7 @@ static void define_va_opt_4() "err()"; ASSERT_EQUALS("", preprocess(code2, &outputList)); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'err', Missing parenthesis for __VA_OPT__(content)\n", + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", toString(outputList)); } @@ -968,7 +968,46 @@ static void define_va_opt_5() simplecpp::OutputList outputList; ASSERT_EQUALS("", preprocess(code, &outputList)); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'err', Missing parenthesis for __VA_OPT__(content)\n", + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); +} + +static void define_va_opt_6() +{ + // nested __VA_OPT__ + const char code[] = "#define err(...) __VA_OPT__(__VA_OPT__(something))\n" + "err()"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': __VA_OPT__ cannot be nested\n", + toString(outputList)); +} + +static void define_va_opt_7() +{ + // eof in __VA_OPT__ + const char code1[] = "#define err(...) __VA_OPT__"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code1, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); + + outputList.clear(); + + const char code2[] = "#define err(...) __VA_OPT__("; + + ASSERT_EQUALS("", preprocess(code2, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing closing parenthesis for __VA_OPT__\n", + toString(outputList)); + + outputList.clear(); + + const char code3[] = "#define err(...) __VA_OPT__(x"; + + ASSERT_EQUALS("", preprocess(code3, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing closing parenthesis for __VA_OPT__\n", toString(outputList)); } @@ -3063,6 +3102,8 @@ int main(int argc, char **argv) TEST_CASE(define_va_opt_3); TEST_CASE(define_va_opt_4); TEST_CASE(define_va_opt_5); + TEST_CASE(define_va_opt_6); + TEST_CASE(define_va_opt_7); TEST_CASE(pragma_backslash); // multiline pragma directive diff --git a/testsuite/clang-preprocessor-tests/macro_fn_va_opt.c b/testsuite/clang-preprocessor-tests/macro_fn_va_opt.c new file mode 100644 index 00000000..ccb09e95 --- /dev/null +++ b/testsuite/clang-preprocessor-tests/macro_fn_va_opt.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -E %s | grep '^ printf( "%%s" , "Hello" );$' + +#define P( x, ...) printf( x __VA_OPT__(,) __VA_ARGS__ ) +#define PF( x, ...) P( x __VA_OPT__(,) __VA_ARGS__ ) + +int main() +{ + PF( "%s", "Hello" ); + PF( "Hello", ); + PF( "Hello" ); + PF( , ); + PF( ); +} From 0ff0149510fcebc8ccdaf56402d400d5e290fca7 Mon Sep 17 00:00:00 2001 From: glankk Date: Thu, 3 Jul 2025 11:33:59 +0200 Subject: [PATCH 22/41] Fix #449 (Update c++ standard to c++11) (#450) --- .github/workflows/clang-tidy.yml | 2 +- CMakeLists.txt | 21 +------ Makefile | 4 +- simplecpp.cpp | 99 +++++--------------------------- simplecpp.h | 12 ---- 5 files changed, 19 insertions(+), 119 deletions(-) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 9a4f43c9..a2f7b6dc 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -30,7 +30,7 @@ jobs: - name: Prepare CMake run: | - cmake -S . -B cmake.output -G "Unix Makefiles" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DDISABLE_CPP03_SYNTAX_CHECK=ON + cmake -S . -B cmake.output -G "Unix Makefiles" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON env: CXX: clang-20 diff --git a/CMakeLists.txt b/CMakeLists.txt index 672e63bb..6ab0166e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,8 @@ cmake_minimum_required (VERSION 3.10) project (simplecpp LANGUAGES CXX) -option(DISABLE_CPP03_SYNTAX_CHECK "Disable the C++03 syntax check." OFF) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) include(CheckCXXCompilerFlag) @@ -70,25 +71,7 @@ endif() add_library(simplecpp_obj OBJECT simplecpp.cpp) add_executable(simplecpp $ main.cpp) -set_property(TARGET simplecpp PROPERTY CXX_STANDARD 11) - -if (NOT DISABLE_CPP03_SYNTAX_CHECK) - # it is not possible to set a standard older than C++14 with Visual Studio - if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # we need to create a dummy library as -fsyntax-only will not produce any output files causing the build to fail - add_library(simplecpp-03-syntax OBJECT simplecpp.cpp) - target_compile_options(simplecpp-03-syntax PRIVATE -std=c++03) - if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") - target_compile_options(simplecpp-03-syntax PRIVATE -Wno-long-long) - elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(simplecpp-03-syntax PRIVATE -Wno-c++11-long-long -Wno-c++11-compat) - endif() - add_dependencies(simplecpp simplecpp-03-syntax) - endif() -endif() - add_executable(testrunner $ test.cpp) -set_property(TARGET testrunner PROPERTY CXX_STANDARD 11) enable_testing() add_test(NAME testrunner COMMAND testrunner) diff --git a/Makefile b/Makefile index db1ca257..73977517 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ all: testrunner simplecpp -CXXFLAGS = -Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wundef -Wno-multichar -Wold-style-cast -std=c++0x -g +CXXFLAGS = -Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wundef -Wno-multichar -Wold-style-cast -std=c++11 -g LDFLAGS = -g %.o: %.cpp simplecpp.h @@ -11,8 +11,6 @@ testrunner: test.o simplecpp.o $(CXX) $(LDFLAGS) simplecpp.o test.o -o testrunner test: testrunner simplecpp - # The -std=c++03 makes sure that simplecpp.cpp is C++03 conformant. We don't require a C++11 compiler - g++ -std=c++03 -fsyntax-only simplecpp.cpp ./testrunner python3 run-tests.py diff --git a/simplecpp.cpp b/simplecpp.cpp index 97657d60..c29040bc 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -31,12 +31,10 @@ #include #include #include -#if __cplusplus >= 201103L #ifdef SIMPLECPP_WINDOWS #include #endif #include -#endif #include #include @@ -51,18 +49,6 @@ #undef ERROR #endif -#if __cplusplus >= 201103L -#define OVERRIDE override -#define EXPLICIT explicit -#else -#define OVERRIDE -#define EXPLICIT -#endif - -#if (__cplusplus < 201103L) && !defined(__APPLE__) -#define nullptr NULL -#endif - static bool isHex(const std::string &s) { return s.size()>2 && (s.compare(0,2,"0x")==0 || s.compare(0,2,"0X")==0); @@ -368,22 +354,22 @@ class simplecpp::TokenList::Stream { class StdIStream : public simplecpp::TokenList::Stream { public: // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members - EXPLICIT StdIStream(std::istream &istr) + explicit StdIStream(std::istream &istr) : istr(istr) { assert(istr.good()); init(); } - virtual int get() OVERRIDE { + virtual int get() override { return istr.get(); } - virtual int peek() OVERRIDE { + virtual int peek() override { return istr.peek(); } - virtual void unget() OVERRIDE { + virtual void unget() override { istr.unget(); } - virtual bool good() OVERRIDE { + virtual bool good() override { return istr.good(); } @@ -402,20 +388,20 @@ class StdCharBufStream : public simplecpp::TokenList::Stream { init(); } - virtual int get() OVERRIDE { + virtual int get() override { if (pos >= size) return lastStatus = EOF; return str[pos++]; } - virtual int peek() OVERRIDE { + virtual int peek() override { if (pos >= size) return lastStatus = EOF; return str[pos]; } - virtual void unget() OVERRIDE { + virtual void unget() override { --pos; } - virtual bool good() OVERRIDE { + virtual bool good() override { return lastStatus != EOF; } @@ -429,7 +415,7 @@ class StdCharBufStream : public simplecpp::TokenList::Stream { class FileStream : public simplecpp::TokenList::Stream { public: // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members - EXPLICIT FileStream(const std::string &filename, std::vector &files) + explicit FileStream(const std::string &filename, std::vector &files) : file(fopen(filename.c_str(), "rb")) , lastCh(0) , lastStatus(0) { @@ -440,25 +426,25 @@ class FileStream : public simplecpp::TokenList::Stream { init(); } - ~FileStream() OVERRIDE { + ~FileStream() override { fclose(file); file = nullptr; } - virtual int get() OVERRIDE { + virtual int get() override { lastStatus = lastCh = fgetc(file); return lastCh; } - virtual int peek() OVERRIDE{ + virtual int peek() override{ // keep lastCh intact const int ch = fgetc(file); unget_internal(ch); return ch; } - virtual void unget() OVERRIDE { + virtual void unget() override { unget_internal(lastCh); } - virtual bool good() OVERRIDE { + virtual bool good() override { return lastStatus != EOF; } @@ -519,12 +505,10 @@ simplecpp::TokenList::TokenList(const TokenList &other) : frontToken(nullptr), b *this = other; } -#if __cplusplus >= 201103L simplecpp::TokenList::TokenList(TokenList &&other) : frontToken(nullptr), backToken(nullptr), files(other.files) { *this = std::move(other); } -#endif simplecpp::TokenList::~TokenList() { @@ -543,7 +527,6 @@ simplecpp::TokenList &simplecpp::TokenList::operator=(const TokenList &other) return *this; } -#if __cplusplus >= 201103L simplecpp::TokenList &simplecpp::TokenList::operator=(TokenList &&other) { if (this != &other) { @@ -557,7 +540,6 @@ simplecpp::TokenList &simplecpp::TokenList::operator=(TokenList &&other) } return *this; } -#endif void simplecpp::TokenList::clear() { @@ -1477,11 +1459,7 @@ unsigned int simplecpp::TokenList::fileIndex(const std::string &filename) namespace simplecpp { class Macro; -#if __cplusplus >= 201103L using MacroMap = std::unordered_map; -#else - typedef std::map MacroMap; -#endif class Macro { public: @@ -1791,13 +1769,8 @@ namespace simplecpp { tok = tok->next; } } -#if __cplusplus >= 201103L optExpandValue = new TokenList(std::move(expandValue)); optNoExpandValue = new TokenList(std::move(noExpandValue)); -#else - optExpandValue = new TokenList(expandValue); - optNoExpandValue = new TokenList(noExpandValue); -#endif } return true; @@ -2437,47 +2410,9 @@ namespace simplecpp { #ifdef SIMPLECPP_WINDOWS -#if __cplusplus >= 201103L using MyMutex = std::mutex; template using MyLock = std::lock_guard; -#else -class MyMutex { -public: - MyMutex() { - InitializeCriticalSection(&m_criticalSection); - } - - ~MyMutex() { - DeleteCriticalSection(&m_criticalSection); - } - - CRITICAL_SECTION* lock() { - return &m_criticalSection; - } -private: - CRITICAL_SECTION m_criticalSection; -}; - -template -class MyLock { -public: - explicit MyLock(T& m) - : m_mutex(m) { - EnterCriticalSection(m_mutex.lock()); - } - - ~MyLock() { - LeaveCriticalSection(m_mutex.lock()); - } - -private: - MyLock& operator=(const MyLock&); - MyLock(const MyLock&); - - T& m_mutex; -}; -#endif class RealFileNameMap { public: @@ -4099,7 +4034,3 @@ std::string simplecpp::getCppStdString(const std::string &std) { return getCppStdString(getCppStd(std)); } - -#if (__cplusplus < 201103L) && !defined(__APPLE__) -#undef nullptr -#endif diff --git a/simplecpp.h b/simplecpp.h index f5c69593..9fd95808 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -27,10 +27,6 @@ # define SIMPLECPP_LIB #endif -#if (__cplusplus < 201103L) && !defined(__APPLE__) -#define nullptr NULL -#endif - #if defined(_MSC_VER) # pragma warning(push) // suppress warnings about "conversion from 'type1' to 'type2', possible loss of data" @@ -214,14 +210,10 @@ namespace simplecpp { /** generates a token list from the given filename parameter */ TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList = nullptr); TokenList(const TokenList &other); -#if __cplusplus >= 201103L TokenList(TokenList &&other); -#endif ~TokenList(); TokenList &operator=(const TokenList &other); -#if __cplusplus >= 201103L TokenList &operator=(TokenList &&other); -#endif void clear(); bool empty() const { @@ -395,8 +387,4 @@ namespace simplecpp { # pragma warning(pop) #endif -#if (__cplusplus < 201103L) && !defined(__APPLE__) -#undef nullptr -#endif - #endif From c1f368832dbefa508480bbcdafea87f044645563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludvig=20Gunne=20Lindstr=C3=B6m?= Date: Thu, 3 Jul 2025 11:39:43 +0200 Subject: [PATCH 23/41] Fix #454: Accept __has_include for GNU C standards (#456) --- simplecpp.cpp | 9 ++++++--- test.cpp | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index c29040bc..d925773e 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -2673,13 +2673,16 @@ static void simplifySizeof(simplecpp::TokenList &expr, const std::map= "201703L"); } +static bool isGnu(const simplecpp::DUI &dui) +{ + return dui.std.rfind("gnu", 0) != std::string::npos; +} static std::string currentDirectoryOSCalc() { const std::size_t size = 4096; @@ -2752,7 +2755,7 @@ static std::string extractRelativePathFromAbsolute(const std::string& absoluteSi static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI &dui) { - if (!isCpp17OrLater(dui)) + if (!isCpp17OrLater(dui) && !isGnu(dui)) return; for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) { @@ -3475,7 +3478,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL // use a dummy vector for the macros because as this is not part of the file and would add an empty entry - e.g. /usr/include/poll.h std::vector dummy; - const bool hasInclude = isCpp17OrLater(dui); + const bool hasInclude = isCpp17OrLater(dui) || isGnu(dui); MacroMap macros; for (std::list::const_iterator it = dui.defines.begin(); it != dui.defines.end(); ++it) { const std::string ¯ostr = *it; diff --git a/test.cpp b/test.cpp index caa6137e..45498a08 100644 --- a/test.cpp +++ b/test.cpp @@ -1602,6 +1602,21 @@ static void has_include_5() ASSERT_EQUALS("", preprocess(code)); } +static void has_include_6() +{ + const char code[] = "#if defined( __has_include)\n" + " #if !__has_include()\n" + " A\n" + " #else\n" + " B\n" + " #endif\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "gnu99"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); + ASSERT_EQUALS("", preprocess(code)); +} + static void ifdef1() { const char code[] = "#ifdef A\n" @@ -3164,6 +3179,7 @@ int main(int argc, char **argv) TEST_CASE(has_include_3); TEST_CASE(has_include_4); TEST_CASE(has_include_5); + TEST_CASE(has_include_6); TEST_CASE(ifdef1); TEST_CASE(ifdef2); From 4bbd1bf8e320471f2a46908c947251ed8aa18b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludvig=20Gunne=20Lindstr=C3=B6m?= Date: Thu, 3 Jul 2025 12:02:25 +0200 Subject: [PATCH 24/41] Fix #452: Undefined function-style macro does not cause an error (#453) --- simplecpp.cpp | 2 ++ test.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/simplecpp.cpp b/simplecpp.cpp index d925773e..a69dc0c2 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -2827,6 +2827,8 @@ static void simplifyName(simplecpp::TokenList &expr) if (alt) continue; } + if (tok->next && tok->next->str() == "(") + throw std::runtime_error("undefined function-like macro invocation: " + tok->str() + "( ... )"); tok->setstr("0"); } } diff --git a/test.cpp b/test.cpp index 45498a08..fb3e4b22 100644 --- a/test.cpp +++ b/test.cpp @@ -1879,6 +1879,15 @@ static void ifexpr() ASSERT_EQUALS("\n\n1", preprocess(code)); } +static void ifUndefFuncStyleMacro() +{ + const char code[] = "#if A()\n" + "#endif\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition, undefined function-like macro invocation: A( ... )\n", toString(outputList)); +} + static void location1() { const char *code; @@ -3202,6 +3211,7 @@ int main(int argc, char **argv) TEST_CASE(ifdiv0); TEST_CASE(ifalt); // using "and", "or", etc TEST_CASE(ifexpr); + TEST_CASE(ifUndefFuncStyleMacro); TEST_CASE(location1); TEST_CASE(location2); From f566848788c1445e56b543255e82ac3b0c952ee9 Mon Sep 17 00:00:00 2001 From: glankk Date: Mon, 7 Jul 2025 10:33:11 +0200 Subject: [PATCH 25/41] Add caching of conditional directive chains (#468) --- simplecpp.cpp | 15 ++++++++++++++- simplecpp.h | 5 +++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index a69dc0c2..e9641b04 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3532,6 +3532,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL // AlwaysFalse => drop all code in #if and #else enum IfState { True, ElseIsTrue, AlwaysFalse }; std::stack ifstates; + std::stack iftokens; ifstates.push(True); std::stack includetokenstack; @@ -3855,15 +3856,24 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL ifstates.push(AlwaysFalse); else ifstates.push(conditionIsTrue ? True : ElseIsTrue); + iftokens.push(rawtok); } else if (ifstates.top() == True) { ifstates.top() = AlwaysFalse; + iftokens.top()->nextcond = rawtok; + iftokens.top() = rawtok; } else if (ifstates.top() == ElseIsTrue && conditionIsTrue) { ifstates.top() = True; + iftokens.top()->nextcond = rawtok; + iftokens.top() = rawtok; } } else if (rawtok->str() == ELSE) { ifstates.top() = (ifstates.top() == ElseIsTrue) ? True : AlwaysFalse; + iftokens.top()->nextcond = rawtok; + iftokens.top() = rawtok; } else if (rawtok->str() == ENDIF) { ifstates.pop(); + iftokens.top()->nextcond = rawtok; + iftokens.pop(); } else if (rawtok->str() == UNDEF) { if (ifstates.top() == True) { const Token *tok = rawtok->next; @@ -3875,7 +3885,10 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL } else if (ifstates.top() == True && rawtok->str() == PRAGMA && rawtok->next && rawtok->next->str() == ONCE && sameline(rawtok,rawtok->next)) { pragmaOnce.insert(rawtok->location.file()); } - rawtok = gotoNextLine(rawtok); + if (ifstates.top() != True && rawtok->nextcond) + rawtok = rawtok->nextcond->previous; + else + rawtok = gotoNextLine(rawtok); continue; } diff --git a/simplecpp.h b/simplecpp.h index 9fd95808..579e6e14 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -96,12 +96,12 @@ namespace simplecpp { class SIMPLECPP_LIB Token { public: Token(const TokenString &s, const Location &loc, bool wsahead = false) : - whitespaceahead(wsahead), location(loc), previous(nullptr), next(nullptr), string(s) { + whitespaceahead(wsahead), location(loc), previous(nullptr), next(nullptr), nextcond(nullptr), string(s) { flags(); } Token(const Token &tok) : - macro(tok.macro), op(tok.op), comment(tok.comment), name(tok.name), number(tok.number), whitespaceahead(tok.whitespaceahead), location(tok.location), previous(nullptr), next(nullptr), string(tok.string), mExpandedFrom(tok.mExpandedFrom) { + macro(tok.macro), op(tok.op), comment(tok.comment), name(tok.name), number(tok.number), whitespaceahead(tok.whitespaceahead), location(tok.location), previous(nullptr), next(nullptr), nextcond(nullptr), string(tok.string), mExpandedFrom(tok.mExpandedFrom) { } void flags() { @@ -137,6 +137,7 @@ namespace simplecpp { Location location; Token *previous; Token *next; + mutable const Token *nextcond; const Token *previousSkipComments() const { const Token *tok = this->previous; From c7e99745d0ee079ee3041b7ff7bff03ced19307a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludvig=20Gunne=20Lindstr=C3=B6m?= Date: Mon, 7 Jul 2025 10:45:27 +0200 Subject: [PATCH 26/41] fix #459: Set __STRICT_ANSI__=1 for non-gnu standards (#460) --- simplecpp.cpp | 7 +++++++ test.cpp | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/simplecpp.cpp b/simplecpp.cpp index e9641b04..c0f6e2c0 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3482,11 +3482,14 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const bool hasInclude = isCpp17OrLater(dui) || isGnu(dui); MacroMap macros; + bool strictAnsiDefined = false; for (std::list::const_iterator it = dui.defines.begin(); it != dui.defines.end(); ++it) { const std::string ¯ostr = *it; const std::string::size_type eq = macrostr.find('='); const std::string::size_type par = macrostr.find('('); const std::string macroname = macrostr.substr(0, std::min(eq,par)); + if (macroname == "__STRICT_ANSI__") + strictAnsiDefined = true; if (dui.undefined.find(macroname) != dui.undefined.end()) continue; const std::string lhs(macrostr.substr(0,eq)); @@ -3495,6 +3498,10 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL macros.insert(std::pair(macro.name(), macro)); } + const bool strictAnsiUndefined = dui.undefined.find("__STRICT_ANSI__") != dui.undefined.cend(); + if (!isGnu(dui) && !strictAnsiDefined && !strictAnsiUndefined) + macros.insert(std::pair("__STRICT_ANSI__", Macro("__STRICT_ANSI__", "1", dummy))); + macros.insert(std::make_pair("__FILE__", Macro("__FILE__", "__FILE__", dummy))); macros.insert(std::make_pair("__LINE__", Macro("__LINE__", "__LINE__", dummy))); macros.insert(std::make_pair("__COUNTER__", Macro("__COUNTER__", "__COUNTER__", dummy))); diff --git a/test.cpp b/test.cpp index fb3e4b22..ba21d71b 100644 --- a/test.cpp +++ b/test.cpp @@ -1617,6 +1617,48 @@ static void has_include_6() ASSERT_EQUALS("", preprocess(code)); } +static void strict_ansi_1() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "gnu99"; + ASSERT_EQUALS("", preprocess(code, dui)); +} + +static void strict_ansi_2() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "c99"; + ASSERT_EQUALS("\nA", preprocess(code, dui)); +} + +static void strict_ansi_3() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "c99"; + dui.undefined.insert("__STRICT_ANSI__"); + ASSERT_EQUALS("", preprocess(code, dui)); +} + +static void strict_ansi_4() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "gnu99"; + dui.defines.push_back("__STRICT_ANSI__"); + ASSERT_EQUALS("\nA", preprocess(code, dui)); +} + static void ifdef1() { const char code[] = "#ifdef A\n" @@ -3190,6 +3232,11 @@ int main(int argc, char **argv) TEST_CASE(has_include_5); TEST_CASE(has_include_6); + TEST_CASE(strict_ansi_1); + TEST_CASE(strict_ansi_2); + TEST_CASE(strict_ansi_3); + TEST_CASE(strict_ansi_4); + TEST_CASE(ifdef1); TEST_CASE(ifdef2); TEST_CASE(ifndef); From a0430f34e8b9e8a5980158eb9c7e5101b9f19473 Mon Sep 17 00:00:00 2001 From: glankk Date: Mon, 7 Jul 2025 11:22:23 +0200 Subject: [PATCH 27/41] Include and path handling optimization (#447) --- integration_test.py | 20 +- main.cpp | 3 +- simplecpp.cpp | 521 ++++++++++++-------------------------------- simplecpp.h | 131 ++++++++++- test.cpp | 173 ++++++++------- 5 files changed, 372 insertions(+), 476 deletions(-) diff --git a/integration_test.py b/integration_test.py index 27528e16..a59ae338 100644 --- a/integration_test.py +++ b/integration_test.py @@ -237,7 +237,13 @@ def test_same_name_header(record_property, tmpdir): assert stderr == "" def test_pragma_once_matching(record_property, tmpdir): - if platform.system() == "win32": + test_dir = os.path.join(tmpdir, "test_dir") + test_subdir = os.path.join(test_dir, "test_subdir") + + test_file = os.path.join(test_dir, "test.c") + once_header = os.path.join(test_dir, "once.h") + + if platform.system() == "Windows": names_to_test = [ '"once.h"', '"Once.h"', @@ -251,6 +257,10 @@ def test_pragma_once_matching(record_property, tmpdir): '"test_subdir/../Once.h"', '"Test_Subdir/../once.h"', '"Test_Subdir/../Once.h"', + f"\"{test_dir}/once.h\"", + f"\"{test_dir}/Once.h\"", + f"<{test_dir}/once.h>", + f"<{test_dir}/Once.h>", ] else: names_to_test = [ @@ -258,14 +268,10 @@ def test_pragma_once_matching(record_property, tmpdir): '', '"../test_dir/once.h"', '"test_subdir/../once.h"', + f"\"{test_dir}/once.h\"", + f"<{test_dir}/once.h>", ] - test_dir = os.path.join(tmpdir, "test_dir") - test_subdir = os.path.join(test_dir, "test_subdir") - - test_file = os.path.join(test_dir, "test.c") - once_header = os.path.join(test_dir, "once.h") - os.mkdir(test_dir) os.mkdir(test_subdir) diff --git a/main.cpp b/main.cpp index 424ef6fa..a6d14386 100644 --- a/main.cpp +++ b/main.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -128,7 +127,7 @@ int main(int argc, char **argv) } rawtokens->removeComments(); simplecpp::TokenList outputTokens(files); - std::map filedata; + simplecpp::FileDataCache filedata; simplecpp::preprocess(outputTokens, *rawtokens, files, filedata, dui, &outputList); simplecpp::cleanup(filedata); delete rawtokens; diff --git a/simplecpp.cpp b/simplecpp.cpp index c0f6e2c0..128da0bd 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -4,8 +4,10 @@ */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) -#define SIMPLECPP_WINDOWS -#define NOMINMAX +# define _WIN32_WINNT 0x0602 +# define NOMINMAX +# include +# undef ERROR #endif #include "simplecpp.h" @@ -32,21 +34,16 @@ #include #include #ifdef SIMPLECPP_WINDOWS -#include +# include #endif #include #include #include #ifdef _WIN32 -#include +# include #else -#include -#endif - -#ifdef SIMPLECPP_WINDOWS -#include -#undef ERROR +# include #endif static bool isHex(const std::string &s) @@ -139,12 +136,6 @@ static unsigned long long stringToULL(const std::string &s) return ret; } -// TODO: added an undercore since this conflicts with a function of the same name in utils.h from Cppcheck source when building Cppcheck with MSBuild -static bool startsWith_(const std::string &s, const std::string &p) -{ - return (s.size() >= p.size()) && std::equal(p.begin(), p.end(), s.begin()); -} - static bool endsWith(const std::string &s, const std::string &e) { return (s.size() >= e.size()) && std::equal(e.rbegin(), e.rend(), s.rbegin()); @@ -435,7 +426,7 @@ class FileStream : public simplecpp::TokenList::Stream { lastStatus = lastCh = fgetc(file); return lastCh; } - virtual int peek() override{ + virtual int peek() override { // keep lastCh intact const int ch = fgetc(file); unget_internal(ch); @@ -2409,132 +2400,6 @@ namespace simplecpp { } #ifdef SIMPLECPP_WINDOWS - -using MyMutex = std::mutex; -template -using MyLock = std::lock_guard; - -class RealFileNameMap { -public: - RealFileNameMap() {} - - bool getCacheEntry(const std::string& path, std::string& returnPath) { - MyLock lock(m_mutex); - - const std::map::iterator it = m_fileMap.find(path); - if (it != m_fileMap.end()) { - returnPath = it->second; - return true; - } - return false; - } - - void addToCache(const std::string& path, const std::string& actualPath) { - MyLock lock(m_mutex); - m_fileMap[path] = actualPath; - } - -private: - std::map m_fileMap; - MyMutex m_mutex; -}; - -static RealFileNameMap realFileNameMap; - -static bool realFileName(const std::string &f, std::string &result) -{ - // are there alpha characters in last subpath? - bool alpha = false; - for (std::string::size_type pos = 1; pos <= f.size(); ++pos) { - const unsigned char c = f[f.size() - pos]; - if (c == '/' || c == '\\') - break; - if (std::isalpha(c)) { - alpha = true; - break; - } - } - - // do not convert this path if there are no alpha characters (either pointless or cause wrong results for . and ..) - if (!alpha) - return false; - - // Lookup filename or foldername on file system - if (!realFileNameMap.getCacheEntry(f, result)) { - - WIN32_FIND_DATAA FindFileData; - -#ifdef __CYGWIN__ - const std::string fConverted = simplecpp::convertCygwinToWindowsPath(f); - const HANDLE hFind = FindFirstFileExA(fConverted.c_str(), FindExInfoBasic, &FindFileData, FindExSearchNameMatch, NULL, 0); -#else - HANDLE hFind = FindFirstFileExA(f.c_str(), FindExInfoBasic, &FindFileData, FindExSearchNameMatch, NULL, 0); -#endif - - if (INVALID_HANDLE_VALUE == hFind) - return false; - result = FindFileData.cFileName; - realFileNameMap.addToCache(f, result); - FindClose(hFind); - } - return true; -} - -static RealFileNameMap realFilePathMap; - -/** Change case in given path to match filesystem */ -static std::string realFilename(const std::string &f) -{ - std::string ret; - ret.reserve(f.size()); // this will be the final size - if (realFilePathMap.getCacheEntry(f, ret)) - return ret; - - // Current subpath - std::string subpath; - - for (std::string::size_type pos = 0; pos < f.size(); ++pos) { - const unsigned char c = f[pos]; - - // Separator.. add subpath and separator - if (c == '/' || c == '\\') { - // if subpath is empty just add separator - if (subpath.empty()) { - ret += c; - continue; - } - - const bool isDriveSpecification = - (pos == 2 && subpath.size() == 2 && std::isalpha(subpath[0]) && subpath[1] == ':'); - - // Append real filename (proper case) - std::string f2; - if (!isDriveSpecification && realFileName(f.substr(0, pos), f2)) - ret += f2; - else - ret += subpath; - - subpath.clear(); - - // Append separator - ret += c; - } else { - subpath += c; - } - } - - if (!subpath.empty()) { - std::string f2; - if (realFileName(f,f2)) - ret += f2; - else - ret += subpath; - } - - realFilePathMap.addToCache(f, ret); - return ret; -} - static bool isAbsolutePath(const std::string &path) { if (path.length() >= 3 && path[0] > 0 && std::isalpha(path[0]) && path[1] == ':' && (path[2] == '\\' || path[2] == '/')) @@ -2542,8 +2407,6 @@ static bool isAbsolutePath(const std::string &path) return path.length() > 1U && (path[0] == '/' || path[0] == '\\'); } #else -#define realFilename(f) f - static bool isAbsolutePath(const std::string &path) { return path.length() > 1U && path[0] == '/'; @@ -2621,8 +2484,7 @@ namespace simplecpp { if (unc) path = '/' + path; - // cppcheck-suppress duplicateExpressionTernary - platform-dependent implementation - return strpbrk(path.c_str(), "*?") == nullptr ? realFilename(path) : path; + return path; } } @@ -2684,37 +2546,8 @@ static bool isGnu(const simplecpp::DUI &dui) return dui.std.rfind("gnu", 0) != std::string::npos; } -static std::string currentDirectoryOSCalc() { - const std::size_t size = 4096; - char currentPath[size]; - -#ifndef _WIN32 - if (getcwd(currentPath, size) != nullptr) -#else - if (_getcwd(currentPath, size) != nullptr) -#endif - return std::string(currentPath); - - return ""; -} - -static const std::string& currentDirectory() { - static const std::string curdir = simplecpp::simplifyPath(currentDirectoryOSCalc()); - return curdir; -} - -static std::string toAbsolutePath(const std::string& path) { - if (path.empty()) { - return path;// preserve error file path that is indicated by an empty string - } - if (!isAbsolutePath(path)) { - return simplecpp::simplifyPath(currentDirectory() + "/" + path); - } - // otherwise - return simplecpp::simplifyPath(path); -} - -static std::string dirPath(const std::string& path, bool withTrailingSlash=true) { +static std::string dirPath(const std::string& path, bool withTrailingSlash=true) +{ const std::size_t lastSlash = path.find_last_of("\\/"); if (lastSlash == std::string::npos) { return ""; @@ -2722,36 +2555,6 @@ static std::string dirPath(const std::string& path, bool withTrailingSlash=true) return path.substr(0, lastSlash + (withTrailingSlash ? 1U : 0U)); } -static std::string omitPathTrailingSlash(const std::string& path) { - if (endsWith(path, "/")) { - return path.substr(0, path.size() - 1U); - } - return path; -} - -static std::string extractRelativePathFromAbsolute(const std::string& absoluteSimplifiedPath, const std::string& prefixSimplifiedAbsoluteDir = currentDirectory()) { - const std::string normalizedAbsolutePath = omitPathTrailingSlash(absoluteSimplifiedPath); - std::string currentPrefix = omitPathTrailingSlash(prefixSimplifiedAbsoluteDir); - std::string leadingParenting; - while (!startsWith_(normalizedAbsolutePath, currentPrefix)) { - leadingParenting = "../" + leadingParenting; - currentPrefix = dirPath(currentPrefix, false); - } - const std::size_t size = currentPrefix.size(); - std::string relativeFromMeetingPath = normalizedAbsolutePath.substr(size, normalizedAbsolutePath.size() - size); - if (currentPrefix.empty() && !(startsWith_(absoluteSimplifiedPath, "/") && startsWith_(prefixSimplifiedAbsoluteDir, "/"))) { - // In the case that there is no common prefix path, - // and at not both of the paths start with `/` (can happen only in Windows paths on distinct partitions), - // return the absolute simplified path as is because no relative path can match. - return absoluteSimplifiedPath; - } - if (startsWith_(relativeFromMeetingPath, "/")) { - // omit the leading slash - relativeFromMeetingPath = relativeFromMeetingPath.substr(1, relativeFromMeetingPath.size()); - } - return leadingParenting + relativeFromMeetingPath; -} - static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI &dui) { @@ -2796,10 +2599,8 @@ static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI for (simplecpp::Token *headerToken = tok1->next; headerToken != tok3; headerToken = headerToken->next) header += headerToken->str(); - // cppcheck-suppress selfAssignment - platform-dependent implementation - header = realFilename(header); } else { - header = realFilename(tok1->str().substr(1U, tok1->str().size() - 2U)); + header = tok1->str().substr(1U, tok1->str().size() - 2U); } std::ifstream f; const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); @@ -3131,206 +2932,185 @@ class NonExistingFilesCache { NonExistingFilesCache() {} bool contains(const std::string& path) { - MyLock lock(m_mutex); + std::lock_guard lock(m_mutex); return (m_pathSet.find(path) != m_pathSet.end()); } void add(const std::string& path) { - MyLock lock(m_mutex); + std::lock_guard lock(m_mutex); m_pathSet.insert(path); } void clear() { - MyLock lock(m_mutex); + std::lock_guard lock(m_mutex); m_pathSet.clear(); } private: std::set m_pathSet; - MyMutex m_mutex; + std::mutex m_mutex; }; static NonExistingFilesCache nonExistingFilesCache; #endif -static std::string openHeader(std::ifstream &f, const std::string &path) +static std::string openHeaderDirect(std::ifstream &f, const std::string &path) { - std::string simplePath = simplecpp::simplifyPath(path); #ifdef SIMPLECPP_WINDOWS - if (nonExistingFilesCache.contains(simplePath)) + if (nonExistingFilesCache.contains(path)) return ""; // file is known not to exist, skip expensive file open call #endif - f.open(simplePath.c_str()); + f.open(path.c_str()); if (f.is_open()) - return simplePath; + return path; #ifdef SIMPLECPP_WINDOWS - nonExistingFilesCache.add(simplePath); + nonExistingFilesCache.add(path); #endif return ""; } -static std::string getRelativeFileName(const std::string &baseFile, const std::string &header, bool returnAbsolutePath) -{ - const std::string baseFileSimplified = simplecpp::simplifyPath(baseFile); - const std::string baseFileAbsolute = isAbsolutePath(baseFileSimplified) ? - baseFileSimplified : - simplecpp::simplifyPath(currentDirectory() + "/" + baseFileSimplified); - - const std::string headerSimplified = simplecpp::simplifyPath(header); - const std::string path = isAbsolutePath(headerSimplified) ? - headerSimplified : - simplecpp::simplifyPath(dirPath(baseFileAbsolute) + headerSimplified); - - return returnAbsolutePath ? toAbsolutePath(path) : extractRelativePathFromAbsolute(path); -} - -static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) -{ - return openHeader(f, getRelativeFileName(sourcefile, header, isAbsolutePath(sourcefile))); -} - -// returns the simplified header path: -// * If the header path is absolute, returns it in absolute path -// * Otherwise, returns it in relative path with respect to the current directory -static std::string getIncludePathFileName(const std::string &includePath, const std::string &header) +static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader) { - std::string simplifiedHeader = simplecpp::simplifyPath(header); + if (isAbsolutePath(header)) + return openHeaderDirect(f, simplecpp::simplifyPath(header)); - if (isAbsolutePath(simplifiedHeader)) { - return simplifiedHeader; + // prefer first to search the header relatively to source file if found, when not a system header + if (!systemheader) { + std::string path = openHeaderDirect(f, simplecpp::simplifyPath(dirPath(sourcefile) + header)); + if (!path.empty()) { + return path; + } } - std::string basePath = toAbsolutePath(includePath); - if (!basePath.empty() && basePath[basePath.size()-1U]!='/' && basePath[basePath.size()-1U]!='\\') - basePath += '/'; - const std::string absoluteSimplifiedHeaderPath = simplecpp::simplifyPath(basePath + simplifiedHeader); - // preserve absoluteness/relativieness of the including dir - return isAbsolutePath(includePath) ? absoluteSimplifiedHeaderPath : extractRelativePathFromAbsolute(absoluteSimplifiedHeaderPath); -} - -static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) -{ - for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string path = openHeader(f, getIncludePathFileName(*it, header)); + // search the header on the include paths (provided by the flags "-I...") + for (const auto &includePath : dui.includePaths) { + std::string path = openHeaderDirect(f, simplecpp::simplifyPath(includePath + "/" + header)); if (!path.empty()) return path; } return ""; } -static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader) +std::pair simplecpp::FileDataCache::tryload(FileDataCache::name_map_type::iterator &name_it, const simplecpp::DUI &dui, std::vector &filenames, simplecpp::OutputList *outputList) { - if (isAbsolutePath(header)) - return openHeader(f, header); + const std::string &path = name_it->first; + FileID fileId; - // prefer first to search the header relatively to source file if found, when not a system header - if (!systemheader) { - std::string relativeHeader = openHeaderRelative(f, sourcefile, header); - if (!relativeHeader.empty()) { - return relativeHeader; - } + if (!getFileId(path, fileId)) + return {nullptr, false}; + + const auto id_it = mIdMap.find(fileId); + if (id_it != mIdMap.end()) { + name_it->second = id_it->second; + return {id_it->second, false}; } - // search the header on the include paths (provided by the flags "-I...") - return openHeaderIncludePath(f, dui, header); -} + std::ifstream f(path); + FileData *const data = new FileData {path, TokenList(f, filenames, path, outputList)}; -static std::string findPathInMapBothRelativeAndAbsolute(const std::map &filedata, const std::string& path) { - // here there are two possibilities - either we match this from absolute path or from a relative one - if (filedata.find(path) != filedata.end()) {// try first to respect the exact match - return path; - } + if (dui.removeComments) + data->tokens.removeComments(); - // otherwise - try to use the normalize to the correct representation - std::string alternativePath; - if (isAbsolutePath(path)) { - alternativePath = extractRelativePathFromAbsolute(simplecpp::simplifyPath(path)); - } else { - alternativePath = toAbsolutePath(path); - } + name_it->second = data; + mIdMap.emplace(fileId, data); + mData.emplace_back(data); - if (filedata.find(alternativePath) != filedata.end()) { - return alternativePath; - } - return ""; + return {data, true}; } -static std::string getFileIdPath(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +std::pair simplecpp::FileDataCache::get(const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader, std::vector &filenames, simplecpp::OutputList *outputList) { - if (filedata.empty()) { - return ""; - } if (isAbsolutePath(header)) { - const std::string simplifiedHeaderPath = simplecpp::simplifyPath(header); - const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, simplifiedHeaderPath); - if (!match.empty()) { - return match; + auto ins = mNameMap.emplace(simplecpp::simplifyPath(header), nullptr); + + if (ins.second) { + const auto ret = tryload(ins.first, dui, filenames, outputList); + if (ret.first != nullptr) { + return ret; + } + } else { + return {ins.first->second, false}; } + + return {nullptr, false}; } if (!systemheader) { - const std::string relativeFilename = getRelativeFileName(sourcefile, header, true); - const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, relativeFilename); - if (!match.empty()) { - return match; - } - // if the file exists but hasn't been loaded yet then we need to stop searching here or we could get a false match - std::ifstream f; - openHeader(f, relativeFilename); - if (f.is_open()) { - f.close(); - return ""; + auto ins = mNameMap.emplace(simplecpp::simplifyPath(dirPath(sourcefile) + header), nullptr); + + if (ins.second) { + const auto ret = tryload(ins.first, dui, filenames, outputList); + if (ret.first != nullptr) { + return ret; + } + } else if (ins.first->second != nullptr) { + return {ins.first->second, false}; } - } else if (filedata.find(header) != filedata.end()) { - return header;// system header that its file is already in the filedata - return that as is } - for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - const std::string match = findPathInMapBothRelativeAndAbsolute(filedata, getIncludePathFileName(*it, header)); - if (!match.empty()) { - return match; + for (const auto &includePath : dui.includePaths) { + auto ins = mNameMap.emplace(simplecpp::simplifyPath(includePath + "/" + header), nullptr); + + if (ins.second) { + const auto ret = tryload(ins.first, dui, filenames, outputList); + if (ret.first != nullptr) { + return ret; + } + } else if (ins.first->second != nullptr) { + return {ins.first->second, false}; } } - return ""; + return {nullptr, false}; } -static bool hasFile(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +bool simplecpp::FileDataCache::getFileId(const std::string &path, FileID &id) { - return !getFileIdPath(filedata, sourcefile, header, dui, systemheader).empty(); -} +#ifdef SIMPLECPP_WINDOWS + HANDLE hFile = CreateFileA(path.c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); -static void safeInsertTokenListToMap(std::map &filedata, const std::string &header2, simplecpp::TokenList *tokens, const std::string &header, const std::string &sourcefile, bool systemheader, const char* contextDesc) -{ - const bool inserted = filedata.insert(std::make_pair(header2, tokens)).second; - if (!inserted) { - std::cerr << "error in " << contextDesc << " - attempt to add a tokenized file to the file map, but this file is already in the map! Details:" << - "header: " << header << " header2: " << header2 << " source: " << sourcefile << " systemheader: " << systemheader << std::endl; - std::abort(); - } + if (hFile == INVALID_HANDLE_VALUE) + return false; + + const BOOL ret = GetFileInformationByHandleEx(hFile, FileIdInfo, &id.fileIdInfo, sizeof(id.fileIdInfo)); + + CloseHandle(hFile); + + return ret == TRUE; +#else + struct stat statbuf; + + if (stat(path.c_str(), &statbuf) != 0) + return false; + + id.dev = statbuf.st_dev; + id.ino = statbuf.st_ino; + + return true; +#endif } -std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) +simplecpp::FileDataCache simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) { #ifdef SIMPLECPP_WINDOWS if (dui.clearIncludeCache) nonExistingFilesCache.clear(); #endif - std::map ret; + FileDataCache cache; std::list filelist; // -include files for (std::list::const_iterator it = dui.includes.begin(); it != dui.includes.end(); ++it) { - const std::string &filename = realFilename(*it); + const std::string &filename = *it; - if (ret.find(filename) != ret.end()) - continue; + const auto loadResult = cache.get("", filename, dui, false, filenames, outputList); + const bool loaded = loadResult.second; + FileData *const filedata = loadResult.first; - std::ifstream fin(filename.c_str()); - if (!fin.is_open()) { + if (filedata == nullptr) { if (outputList) { simplecpp::Output err(filenames); err.type = simplecpp::Output::EXPLICIT_INCLUDE_NOT_FOUND; @@ -3340,18 +3120,17 @@ std::map simplecpp::load(const simplecpp::To } continue; } - fin.close(); - TokenList *tokenlist = new TokenList(filename, filenames, outputList); - if (!tokenlist->front()) { - delete tokenlist; + if (!loaded) + continue; + + if (!filedata->tokens.front()) continue; - } if (dui.removeComments) - tokenlist->removeComments(); - ret[filename] = tokenlist; - filelist.push_back(tokenlist->front()); + filedata->tokens.removeComments(); + + filelist.push_back(filedata->tokens.front()); } for (const Token *rawtok = rawtokens.cfront(); rawtok || !filelist.empty(); rawtok = rawtok ? rawtok->next : nullptr) { @@ -3374,25 +3153,20 @@ std::map simplecpp::load(const simplecpp::To continue; const bool systemheader = (htok->str()[0] == '<'); - const std::string header(realFilename(htok->str().substr(1U, htok->str().size() - 2U))); - if (hasFile(ret, sourcefile, header, dui, systemheader)) - continue; + const std::string header(htok->str().substr(1U, htok->str().size() - 2U)); - std::ifstream f; - const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); - if (!f.is_open()) + FileData *const filedata = cache.get(sourcefile, header, dui, systemheader, filenames, outputList).first; + if (!filedata) continue; - f.close(); - TokenList *tokens = new TokenList(header2, filenames, outputList); if (dui.removeComments) - tokens->removeComments(); - safeInsertTokenListToMap(ret, header2, tokens, header, rawtok->location.file(), systemheader, "simplecpp::load"); - if (tokens->front()) - filelist.push_back(tokens->front()); + filedata->tokens.removeComments(); + + if (filedata->tokens.front()) + filelist.push_back(filedata->tokens.front()); } - return ret; + return cache; } static bool preprocessToken(simplecpp::TokenList &output, const simplecpp::Token **tok1, simplecpp::MacroMap ¯os, std::vector &files, simplecpp::OutputList *outputList) @@ -3448,7 +3222,7 @@ static std::string getTimeDefine(const struct tm *timep) return std::string("\"").append(buf).append("\""); } -void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector &files, std::map &filedata, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list *macroUsage, std::list *ifCond) +void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector &files, simplecpp::FileDataCache &cache, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list *macroUsage, std::list *ifCond) { #ifdef SIMPLECPP_WINDOWS if (dui.clearIncludeCache) @@ -3548,9 +3322,9 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL includetokenstack.push(rawtokens.cfront()); for (std::list::const_iterator it = dui.includes.begin(); it != dui.includes.end(); ++it) { - const std::map::const_iterator f = filedata.find(*it); - if (f != filedata.end()) - includetokenstack.push(f->second->cfront()); + const FileData *const filedata = cache.get("", *it, dui, false, files, outputList).first; + if (filedata != nullptr && filedata->tokens.cfront() != nullptr) + includetokenstack.push(filedata->tokens.cfront()); } std::map > maybeUsedMacros; @@ -3681,21 +3455,9 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const Token * const inctok = inc2.cfront(); const bool systemheader = (inctok->str()[0] == '<'); - const std::string header(realFilename(inctok->str().substr(1U, inctok->str().size() - 2U))); - std::string header2 = getFileIdPath(filedata, rawtok->location.file(), header, dui, systemheader); - if (header2.empty()) { - // try to load file.. - std::ifstream f; - header2 = openHeader(f, dui, rawtok->location.file(), header, systemheader); - if (f.is_open()) { - f.close(); - TokenList * const tokens = new TokenList(header2, files, outputList); - if (dui.removeComments) - tokens->removeComments(); - safeInsertTokenListToMap(filedata, header2, tokens, header, rawtok->location.file(), systemheader, "simplecpp::preprocess"); - } - } - if (header2.empty()) { + const std::string header(inctok->str().substr(1U, inctok->str().size() - 2U)); + const FileData *const filedata = cache.get(rawtok->location.file(), header, dui, systemheader, files, outputList).first; + if (filedata == nullptr) { if (outputList) { simplecpp::Output out(files); out.type = Output::MISSING_HEADER; @@ -3711,10 +3473,9 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL out.msg = "#include nested too deeply"; outputList->push_back(out); } - } else if (pragmaOnce.find(header2) == pragmaOnce.end()) { + } else if (pragmaOnce.find(filedata->filename) == pragmaOnce.end()) { includetokenstack.push(gotoNextLine(rawtok)); - const TokenList * const includetokens = filedata.find(header2)->second; - rawtok = includetokens ? includetokens->cfront() : nullptr; + rawtok = filedata->tokens.cfront(); continue; } } else if (rawtok->str() == IF || rawtok->str() == IFDEF || rawtok->str() == IFNDEF || rawtok->str() == ELIF) { @@ -3791,12 +3552,10 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL if (systemheader) { while ((tok = tok->next) && tok->op != '>') header += tok->str(); - // cppcheck-suppress selfAssignment - platform-dependent implementation - header = realFilename(header); if (tok && tok->op == '>') closingAngularBracket = true; } else { - header = realFilename(tok->str().substr(1U, tok->str().size() - 2U)); + header = tok->str().substr(1U, tok->str().size() - 2U); closingAngularBracket = true; } std::ifstream f; @@ -3956,11 +3715,9 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL } } -void simplecpp::cleanup(std::map &filedata) +void simplecpp::cleanup(FileDataCache &cache) { - for (std::map::iterator it = filedata.begin(); it != filedata.end(); ++it) - delete it->second; - filedata.clear(); + cache.clear(); } simplecpp::cstd_t simplecpp::getCStd(const std::string &std) diff --git a/simplecpp.h b/simplecpp.h index 579e6e14..76487d6c 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -6,13 +6,19 @@ #ifndef simplecppH #define simplecppH +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) +# define SIMPLECPP_WINDOWS +#endif + #include #include #include #include #include +#include #include #include +#include #include #ifdef _WIN32 @@ -27,6 +33,12 @@ # define SIMPLECPP_LIB #endif +#ifdef SIMPLECPP_WINDOWS +# include +#else +# include +#endif + #if defined(_MSC_VER) # pragma warning(push) // suppress warnings about "conversion from 'type1' to 'type2', possible loss of data" @@ -43,6 +55,7 @@ namespace simplecpp { typedef std::string TokenString; class Macro; + class FileDataCache; /** * Location in source code @@ -342,7 +355,7 @@ namespace simplecpp { SIMPLECPP_LIB long long characterLiteralToLL(const std::string& str); - SIMPLECPP_LIB std::map load(const TokenList &rawtokens, std::vector &filenames, const DUI &dui, OutputList *outputList = nullptr); + SIMPLECPP_LIB FileDataCache load(const TokenList &rawtokens, std::vector &filenames, const DUI &dui, OutputList *outputList = nullptr); /** * Preprocess @@ -350,18 +363,18 @@ namespace simplecpp { * @param output TokenList that receives the preprocessing output * @param rawtokens Raw tokenlist for top sourcefile * @param files internal data of simplecpp - * @param filedata output from simplecpp::load() + * @param cache output from simplecpp::load() * @param dui defines, undefs, and include paths * @param outputList output: list that will receive output messages * @param macroUsage output: macro usage * @param ifCond output: #if/#elif expressions */ - SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector &files, std::map &filedata, const DUI &dui, OutputList *outputList = nullptr, std::list *macroUsage = nullptr, std::list *ifCond = nullptr); + SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector &files, FileDataCache &cache, const DUI &dui, OutputList *outputList = nullptr, std::list *macroUsage = nullptr, std::list *ifCond = nullptr); /** * Deallocate data */ - SIMPLECPP_LIB void cleanup(std::map &filedata); + SIMPLECPP_LIB void cleanup(FileDataCache &cache); /** Simplify path */ SIMPLECPP_LIB std::string simplifyPath(std::string path); @@ -382,6 +395,116 @@ namespace simplecpp { /** Returns the __cplusplus value for a given standard */ SIMPLECPP_LIB std::string getCppStdString(const std::string &std); SIMPLECPP_LIB std::string getCppStdString(cppstd_t std); + + struct SIMPLECPP_LIB FileData { + /** The canonical filename associated with this data */ + std::string filename; + /** The tokens associated with this file */ + TokenList tokens; + }; + + class SIMPLECPP_LIB FileDataCache { + public: + FileDataCache() = default; + + FileDataCache(const FileDataCache &) = delete; + FileDataCache(FileDataCache &&) = default; + + FileDataCache &operator=(const FileDataCache &) = delete; + FileDataCache &operator=(FileDataCache &&) = default; + + /** Get the cached data for a file, or load and then return it if it isn't cached. + * returns the file data and true if the file was loaded, false if it was cached. */ + std::pair get(const std::string &sourcefile, const std::string &header, const DUI &dui, bool systemheader, std::vector &filenames, OutputList *outputList); + + void insert(FileData data) { + FileData *const newdata = new FileData(std::move(data)); + + mData.emplace_back(newdata); + mNameMap.emplace(newdata->filename, newdata); + } + + void clear() { + mNameMap.clear(); + mIdMap.clear(); + mData.clear(); + } + + typedef std::vector> container_type; + typedef container_type::iterator iterator; + typedef container_type::const_iterator const_iterator; + typedef container_type::size_type size_type; + + size_type size() const { + return mData.size(); + } + iterator begin() { + return mData.begin(); + } + iterator end() { + return mData.end(); + } + const_iterator begin() const { + return mData.begin(); + } + const_iterator end() const { + return mData.end(); + } + const_iterator cbegin() const { + return mData.cbegin(); + } + const_iterator cend() const { + return mData.cend(); + } + + private: + struct FileID { +#ifdef SIMPLECPP_WINDOWS + struct { + std::uint64_t VolumeSerialNumber; + struct { + std::uint64_t IdentifierHi; + std::uint64_t IdentifierLo; + } FileId; + } fileIdInfo; + + bool operator==(const FileID &that) const noexcept { + return fileIdInfo.VolumeSerialNumber == that.fileIdInfo.VolumeSerialNumber && + fileIdInfo.FileId.IdentifierHi == that.fileIdInfo.FileId.IdentifierHi && + fileIdInfo.FileId.IdentifierLo == that.fileIdInfo.FileId.IdentifierLo; + } +#else + dev_t dev; + ino_t ino; + + bool operator==(const FileID& that) const noexcept { + return dev == that.dev && ino == that.ino; + } +#endif + struct Hasher { + std::size_t operator()(const FileID &id) const { +#ifdef SIMPLECPP_WINDOWS + return static_cast(id.fileIdInfo.FileId.IdentifierHi ^ id.fileIdInfo.FileId.IdentifierLo ^ + id.fileIdInfo.VolumeSerialNumber); +#else + return static_cast(id.dev) ^ static_cast(id.ino); +#endif + } + }; + }; + + using name_map_type = std::unordered_map; + using id_map_type = std::unordered_map; + + static bool getFileId(const std::string &path, FileID &id); + + std::pair tryload(name_map_type::iterator &name_it, const DUI &dui, std::vector &filenames, OutputList *outputList); + + container_type mData; + name_map_type mNameMap; + id_map_type mIdMap; + + }; } #if defined(_MSC_VER) diff --git a/test.cpp b/test.cpp index ba21d71b..e1968569 100644 --- a/test.cpp +++ b/test.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -100,13 +99,12 @@ static std::string readfile(const char code[], std::size_t size, simplecpp::Outp static std::string preprocess(const char code[], const simplecpp::DUI &dui, simplecpp::OutputList *outputList) { std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens = makeTokenList(code,files); tokens.removeComments(); simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, tokens, files, filedata, dui, outputList); - for (auto &i : filedata) - delete i.second; + simplecpp::preprocess(tokens2, tokens, files, cache, dui, outputList); + simplecpp::cleanup(cache); return tokens2.stringify(); } @@ -1077,11 +1075,11 @@ static void error4() // "#error x\n1" const char code[] = "\xFE\xFF\x00\x23\x00\x65\x00\x72\x00\x72\x00\x6f\x00\x72\x00\x20\x00\x78\x00\x0a\x00\x31"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); const simplecpp::TokenList rawtoken = makeTokenList(code, sizeof(code),files,"test.c"); - simplecpp::preprocess(tokens2, rawtoken, files, filedata, simplecpp::DUI(), &outputList); + simplecpp::preprocess(tokens2, rawtoken, files, cache, simplecpp::DUI(), &outputList); ASSERT_EQUALS("file0,1,#error,#error x\n", toString(outputList)); } @@ -1090,11 +1088,11 @@ static void error5() // "#error x\n1" const char code[] = "\xFF\xFE\x23\x00\x65\x00\x72\x00\x72\x00\x6f\x00\x72\x00\x20\x00\x78\x00\x0a\x00\x78\x00\x31\x00"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); const simplecpp::TokenList rawtokens = makeTokenList(code, sizeof(code),files,"test.c"); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI(), &outputList); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI(), &outputList); ASSERT_EQUALS("file0,1,#error,#error x\n", toString(outputList)); } @@ -2001,12 +1999,14 @@ static void missingHeader2() { const char code[] = "#include \"foo.h\"\n"; // this file exists std::vector files; - std::map filedata; - filedata["foo.h"] = nullptr; + simplecpp::FileDataCache cache; + cache.insert({"foo.h", simplecpp::TokenList(files)}); simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); const simplecpp::TokenList rawtokens = makeTokenList(code,files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI(), &outputList); + simplecpp::DUI dui; + dui.includePaths.push_back("."); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); ASSERT_EQUALS("", toString(outputList)); } @@ -2030,13 +2030,15 @@ static void nestedInclude() { const char code[] = "#include \"test.h\"\n"; std::vector files; - simplecpp::TokenList rawtokens = makeTokenList(code,files,"test.h"); - std::map filedata; - filedata["test.h"] = &rawtokens; + const simplecpp::TokenList rawtokens = makeTokenList(code,files,"test.h"); + simplecpp::FileDataCache cache; + cache.insert({"test.h", rawtokens}); simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI(), &outputList); + simplecpp::DUI dui; + dui.includePaths.push_back("."); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); ASSERT_EQUALS("file0,1,include_nested_too_deeply,#include nested too deeply\n", toString(outputList)); } @@ -2045,14 +2047,16 @@ static void systemInclude() { const char code[] = "#include \n"; std::vector files; - simplecpp::TokenList rawtokens = makeTokenList(code,files,"local/limits.h"); - std::map filedata; - filedata["limits.h"] = nullptr; - filedata["local/limits.h"] = &rawtokens; + const simplecpp::TokenList rawtokens = makeTokenList(code,files,"local/limits.h"); + simplecpp::FileDataCache cache; + cache.insert({"include/limits.h", simplecpp::TokenList(files)}); + cache.insert({"local/limits.h", rawtokens}); simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI(), &outputList); + simplecpp::DUI dui; + dui.includePaths.push_back("include"); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); ASSERT_EQUALS("", toString(outputList)); } @@ -2074,9 +2078,9 @@ static void multiline2() simplecpp::TokenList rawtokens = makeTokenList(code,files); ASSERT_EQUALS("# define A /**/ 1\n\nA", rawtokens.stringify()); rawtokens.removeComments(); - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("\n\n1", tokens2.stringify()); } @@ -2089,9 +2093,9 @@ static void multiline3() // #28 - macro with multiline comment simplecpp::TokenList rawtokens = makeTokenList(code,files); ASSERT_EQUALS("# define A /* */ 1\n\nA", rawtokens.stringify()); rawtokens.removeComments(); - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("\n\n1", tokens2.stringify()); } @@ -2105,9 +2109,9 @@ static void multiline4() // #28 - macro with multiline comment simplecpp::TokenList rawtokens = makeTokenList(code,files); ASSERT_EQUALS("# define A /* */ 1\n\n\nA", rawtokens.stringify()); rawtokens.removeComments(); - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("\n\n\n1", tokens2.stringify()); } @@ -2221,19 +2225,21 @@ static void include3() // #16 - crash when expanding macro from header std::vector files; - simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "A.c"); - simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "A.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "A.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "A.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("A.c", files[0]); ASSERT_EQUALS("A.h", files[1]); - std::map filedata; - filedata["A.c"] = &rawtokens_c; - filedata["A.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"A.c", rawtokens_c}); + cache.insert({"A.h", rawtokens_h}); simplecpp::TokenList out(files); - simplecpp::preprocess(out, rawtokens_c, files, filedata, simplecpp::DUI()); + simplecpp::DUI dui; + dui.includePaths.push_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n1234", out.stringify()); } @@ -2246,21 +2252,22 @@ static void include4() // #27 - -include std::vector files; - simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "27.c"); - simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "27.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "27.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "27.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("27.c", files[0]); ASSERT_EQUALS("27.h", files[1]); - std::map filedata; - filedata["27.c"] = &rawtokens_c; - filedata["27.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"27.c", rawtokens_c}); + cache.insert({"27.h", rawtokens_h}); simplecpp::TokenList out(files); simplecpp::DUI dui; + dui.includePaths.push_back("."); dui.includes.push_back("27.h"); - simplecpp::preprocess(out, rawtokens_c, files, filedata, dui); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("123", out.stringify()); } @@ -2272,19 +2279,21 @@ static void include5() // #3 - handle #include MACRO std::vector files; - simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "3.c"); - simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "3.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "3.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "3.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("3.c", files[0]); ASSERT_EQUALS("3.h", files[1]); - std::map filedata; - filedata["3.c"] = &rawtokens_c; - filedata["3.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"3.c", rawtokens_c}); + cache.insert({"3.h", rawtokens_h}); simplecpp::TokenList out(files); - simplecpp::preprocess(out, rawtokens_c, files, filedata, simplecpp::DUI()); + simplecpp::DUI dui; + dui.includePaths.push_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 1 \"3.h\"\n123", out.stringify()); } @@ -2295,16 +2304,16 @@ static void include6() // #57 - incomplete macro #include MACRO(,) std::vector files; - simplecpp::TokenList rawtokens = makeTokenList(code, files, "57.c"); + const simplecpp::TokenList rawtokens = makeTokenList(code, files, "57.c"); ASSERT_EQUALS(1U, files.size()); ASSERT_EQUALS("57.c", files[0]); - std::map filedata; - filedata["57.c"] = &rawtokens; + simplecpp::FileDataCache cache; + cache.insert({"57.c", rawtokens}); simplecpp::TokenList out(files); - simplecpp::preprocess(out, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(out, rawtokens, files, cache, simplecpp::DUI()); } @@ -2316,21 +2325,21 @@ static void include7() // #include MACRO std::vector files; - simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "3.c"); - simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "3.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "3.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "3.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("3.c", files[0]); ASSERT_EQUALS("3.h", files[1]); - std::map filedata; - filedata["3.c"] = &rawtokens_c; - filedata["3.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"3.c", rawtokens_c}); + cache.insert({"3.h", rawtokens_h}); simplecpp::TokenList out(files); simplecpp::DUI dui; dui.includePaths.push_back("."); - simplecpp::preprocess(out, rawtokens_c, files, filedata, dui); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 1 \"3.h\"\n123", out.stringify()); } @@ -2354,21 +2363,21 @@ static void include9() std::vector files; - simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "1.c"); - simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "1.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "1.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "1.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("1.c", files[0]); ASSERT_EQUALS("1.h", files[1]); - std::map filedata; - filedata["1.c"] = &rawtokens_c; - filedata["1.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"1.c", rawtokens_c}); + cache.insert({"1.h", rawtokens_h}); simplecpp::TokenList out(files); simplecpp::DUI dui; dui.includePaths.push_back("."); - simplecpp::preprocess(out, rawtokens_c, files, filedata, dui); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 2 \"1.h\"\nx = 1 ;", out.stringify()); } @@ -2536,19 +2545,21 @@ static void stringify1() std::vector files; - simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "A.c"); - simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "A.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "A.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "A.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("A.c", files[0]); ASSERT_EQUALS("A.h", files[1]); - std::map filedata; - filedata["A.c"] = &rawtokens_c; - filedata["A.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"A.c", rawtokens_c}); + cache.insert({"A.h", rawtokens_h}); simplecpp::TokenList out(files); - simplecpp::preprocess(out, rawtokens_c, files, filedata, simplecpp::DUI()); + simplecpp::DUI dui; + dui.includePaths.push_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 1 \"A.h\"\n1\n2\n#line 1 \"A.h\"\n1\n2", out.stringify()); } @@ -2558,10 +2569,10 @@ static void tokenMacro1() const char code[] = "#define A 123\n" "A"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); const simplecpp::TokenList rawtokens = makeTokenList(code,files); - simplecpp::preprocess(tokenList, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("A", tokenList.cback()->macro); } @@ -2570,10 +2581,10 @@ static void tokenMacro2() const char code[] = "#define ADD(X,Y) X+Y\n" "ADD(1,2)"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); const simplecpp::TokenList rawtokens = makeTokenList(code,files); - simplecpp::preprocess(tokenList, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); const simplecpp::Token *tok = tokenList.cfront(); ASSERT_EQUALS("1", tok->str()); ASSERT_EQUALS("", tok->macro); @@ -2591,10 +2602,10 @@ static void tokenMacro3() "#define FRED 1\n" "ADD(FRED,2)"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); const simplecpp::TokenList rawtokens = makeTokenList(code,files); - simplecpp::preprocess(tokenList, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); const simplecpp::Token *tok = tokenList.cfront(); ASSERT_EQUALS("1", tok->str()); ASSERT_EQUALS("FRED", tok->macro); @@ -2612,10 +2623,10 @@ static void tokenMacro4() "#define B 1\n" "A"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); const simplecpp::TokenList rawtokens = makeTokenList(code,files); - simplecpp::preprocess(tokenList, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); const simplecpp::Token * const tok = tokenList.cfront(); ASSERT_EQUALS("1", tok->str()); ASSERT_EQUALS("A", tok->macro); @@ -2627,10 +2638,10 @@ static void tokenMacro5() "#define SET_BPF_JUMP(code) SET_BPF(D | code)\n" "SET_BPF_JUMP(A | B | C);"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); const simplecpp::TokenList rawtokens = makeTokenList(code,files); - simplecpp::preprocess(tokenList, rawtokens, files, filedata, simplecpp::DUI()); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); const simplecpp::Token * const tok = tokenList.cfront()->next; ASSERT_EQUALS("D", tok->str()); ASSERT_EQUALS("SET_BPF_JUMP", tok->macro); @@ -3064,8 +3075,8 @@ static void preprocess_files() ASSERT_EQUALS(1, files.size()); ASSERT_EQUALS("", *files.cbegin()); - std::map filedata; - simplecpp::preprocess(tokens2, tokens, files, filedata, simplecpp::DUI(), nullptr); + simplecpp::FileDataCache cache; + simplecpp::preprocess(tokens2, tokens, files, cache, simplecpp::DUI(), nullptr); ASSERT_EQUALS(1, files.size()); ASSERT_EQUALS("", *files.cbegin()); } @@ -3081,8 +3092,8 @@ static void preprocess_files() ASSERT_EQUALS(1, files.size()); ASSERT_EQUALS("test.cpp", *files.cbegin()); - std::map filedata; - simplecpp::preprocess(tokens2, tokens, files, filedata, simplecpp::DUI(), nullptr); + simplecpp::FileDataCache cache; + simplecpp::preprocess(tokens2, tokens, files, cache, simplecpp::DUI(), nullptr); ASSERT_EQUALS(1, files.size()); ASSERT_EQUALS("test.cpp", *files.cbegin()); } From 6dd82cb039f669f83150aff400721b8432134b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Tue, 8 Jul 2025 16:42:31 +0200 Subject: [PATCH 28/41] fixed #466 - CI-unixish.yml: added missing `UBSAN_OPTIONS` (#467) --- .github/workflows/CI-unixish.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index f5a78ea5..ff7c3f65 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -101,6 +101,8 @@ jobs: run: | make clean make -j$(nproc) test selfcheck CXXFLAGS="-O2 -g3 -fsanitize=undefined -fno-sanitize=signed-integer-overflow" LDFLAGS="-fsanitize=undefined -fno-sanitize=signed-integer-overflow" + env: + UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1:report_error_type=1 # TODO: requires instrumented libc++ - name: Run MemorySanitizer From bd068aeaae1414104458b9fd79911dfb8462ebdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Mon, 14 Jul 2025 12:12:20 +0200 Subject: [PATCH 29/41] fixed #464 - added integration test to `test` make target (#465) --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 73977517..4a6ae6b7 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ testrunner: test.o simplecpp.o test: testrunner simplecpp ./testrunner python3 run-tests.py + python3 -m pytest integration_test.py -vv selfcheck: simplecpp ./selfcheck.sh From 5bf471de82684afe7af168739c037fbf6a248d69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Mon, 14 Jul 2025 12:40:45 +0200 Subject: [PATCH 30/41] added test for #346 (#457) --- test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test.cpp b/test.cpp index e1968569..3c5f5488 100644 --- a/test.cpp +++ b/test.cpp @@ -3106,6 +3106,11 @@ static void fuzz_crash() "n\n"; (void)preprocess(code, simplecpp::DUI()); // do not crash } + { // #346 + const char code[] = "#define foo(intp)f##oo(intp\n" + "foo(f##oo(intp))\n"; + (void)preprocess(code, simplecpp::DUI()); // do not crash + } } int main(int argc, char **argv) From 29abbc6bdb29eb72d37892dab776e48e9998d8bc Mon Sep 17 00:00:00 2001 From: clock999 Date: Tue, 15 Jul 2025 19:50:26 +0800 Subject: [PATCH 31/41] fix #337 - line splicing in comment not handled properly (#431) --- simplecpp.cpp | 33 +++++++++++++++++++++++++-------- test.cpp | 25 ++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 128da0bd..721bbdeb 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -758,17 +758,34 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, // comment else if (ch == '/' && stream.peekChar() == '/') { - while (stream.good() && ch != '\r' && ch != '\n') { + while (stream.good() && ch != '\n') { currentToken += ch; ch = stream.readChar(); + if(ch == '\\') { + TokenString tmp; + char tmp_ch = ch; + while((stream.good()) && (tmp_ch == '\\' || tmp_ch == ' ' || tmp_ch == '\t')) { + tmp += tmp_ch; + tmp_ch = stream.readChar(); + } + if(!stream.good()) { + break; + } + + if(tmp_ch != '\n') { + currentToken += tmp; + } else { + TokenString check_portability = currentToken + tmp; + const std::string::size_type pos = check_portability.find_last_not_of(" \t"); + if (pos < check_portability.size() - 1U && check_portability[pos] == '\\') + portabilityBackslash(outputList, files, location); + ++multiline; + tmp_ch = stream.readChar(); + } + ch = tmp_ch; + } } - const std::string::size_type pos = currentToken.find_last_not_of(" \t"); - if (pos < currentToken.size() - 1U && currentToken[pos] == '\\') - portabilityBackslash(outputList, files, location); - if (currentToken[currentToken.size() - 1U] == '\\') { - ++multiline; - currentToken.erase(currentToken.size() - 1U); - } else { + if (ch == '\n') { stream.ungetChar(); } } diff --git a/test.cpp b/test.cpp index 3c5f5488..a7578823 100644 --- a/test.cpp +++ b/test.cpp @@ -434,7 +434,30 @@ static void comment_multiline() const char code[] = "#define ABC {// \\\n" "}\n" "void f() ABC\n"; - ASSERT_EQUALS("\n\nvoid f ( ) { }", preprocess(code)); + ASSERT_EQUALS("\n\nvoid f ( ) {", preprocess(code)); + + const char code1[] = "#define ABC {// \\\r\n" + "}\n" + "void f() ABC\n"; + ASSERT_EQUALS("\n\nvoid f ( ) {", preprocess(code1)); + + const char code2[] = "#define A 1// \\\r" + "\r" + "2\r" + "A\r"; + ASSERT_EQUALS("\n\n2\n1", preprocess(code2)); + + const char code3[] = "void f() {// \\ \n}\n"; + ASSERT_EQUALS("void f ( ) {", preprocess(code3)); + + const char code4[] = "void f() {// \\\\\\\t\t\n}\n"; + ASSERT_EQUALS("void f ( ) {", preprocess(code4)); + + const char code5[] = "void f() {// \\\\\\a\n}\n"; + ASSERT_EQUALS("void f ( ) {\n}", preprocess(code5)); + + const char code6[] = "void f() {// \\\n\n\n}\n"; + ASSERT_EQUALS("void f ( ) {\n\n\n}", preprocess(code6)); } From d0f2b99d656cb6dab6fc99a105a6c59cbcfbb13a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Sun, 3 Aug 2025 07:42:06 +0200 Subject: [PATCH 32/41] Fix #471 (preserve line splicing information in '// ..' comments) (#472) --- simplecpp.cpp | 3 ++- test.cpp | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 721bbdeb..5093b4b7 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -775,12 +775,13 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, if(tmp_ch != '\n') { currentToken += tmp; } else { - TokenString check_portability = currentToken + tmp; + const TokenString check_portability = currentToken + tmp; const std::string::size_type pos = check_portability.find_last_not_of(" \t"); if (pos < check_portability.size() - 1U && check_portability[pos] == '\\') portabilityBackslash(outputList, files, location); ++multiline; tmp_ch = stream.readChar(); + currentToken += '\n'; } ch = tmp_ch; } diff --git a/test.cpp b/test.cpp index a7578823..de9f250b 100644 --- a/test.cpp +++ b/test.cpp @@ -458,6 +458,9 @@ static void comment_multiline() const char code6[] = "void f() {// \\\n\n\n}\n"; ASSERT_EQUALS("void f ( ) {\n\n\n}", preprocess(code6)); + + // #471 ensure there is newline in comment so that line-splicing can be detected by tools + ASSERT_EQUALS("// abc\ndef", readfile("// abc\\\ndef")); } From 435a74cc192e64499ddf96193becf8073c50376c Mon Sep 17 00:00:00 2001 From: glankk Date: Mon, 4 Aug 2025 15:14:18 +0200 Subject: [PATCH 33/41] Fix #391 (`__TIME__` replacement might be empty depending on compiler) (#441) --- simplecpp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 5093b4b7..addac46f 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3236,7 +3236,7 @@ static std::string getDateDefine(const struct tm *timep) static std::string getTimeDefine(const struct tm *timep) { char buf[] = "??:??:??"; - strftime(buf, sizeof(buf), "%T", timep); + strftime(buf, sizeof(buf), "%H:%M:%S", timep); return std::string("\"").append(buf).append("\""); } From 2b4f727da30c87ef2de79cfe81760e3b1b2ca772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Mon, 11 Aug 2025 15:59:06 +0200 Subject: [PATCH 34/41] simplecpp.cpp: fixed Visual Studio C4800 compiler warnings (#481) --- simplecpp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index addac46f..25d0d3c3 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -745,7 +745,7 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, // number or name if (isNameChar(ch)) { - const bool num = std::isdigit(ch); + const bool num = !!std::isdigit(ch); while (stream.good() && isNameChar(ch)) { currentToken += ch; ch = stream.readChar(); @@ -886,7 +886,7 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, } if (prefix.empty()) - push_back(new Token(s, location, std::isspace(stream.peekChar()))); // push string without newlines + push_back(new Token(s, location, !!std::isspace(stream.peekChar()))); // push string without newlines else back()->setstr(prefix + s); @@ -916,7 +916,7 @@ void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, } } - push_back(new Token(currentToken, location, std::isspace(stream.peekChar()))); + push_back(new Token(currentToken, location, !!std::isspace(stream.peekChar()))); if (multiline) location.col += currentToken.size(); From 5783afac7dded04a5e4bb2c9b6b6b593ea2a4c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Mon, 11 Aug 2025 16:00:16 +0200 Subject: [PATCH 35/41] Makefile: added `CXXOPTS` and `LDOPTS` to extend `CXXFLAGS` and `LDFLAGS` (#480) --- .github/workflows/CI-unixish.yml | 10 +++++----- Makefile | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index ff7c3f65..cb498b5e 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -80,19 +80,19 @@ jobs: if: matrix.os == 'ubuntu-24.04' && matrix.compiler == 'g++' run: | make clean - make -j$(nproc) test selfcheck CXXFLAGS="-g3 -D_GLIBCXX_DEBUG" + make -j$(nproc) test selfcheck CXXOPTS="-g3 -D_GLIBCXX_DEBUG" - name: Run with libc++ hardening mode if: matrix.os == 'ubuntu-24.04' && matrix.compiler == 'clang++' run: | make clean - make -j$(nproc) test selfcheck CXXFLAGS="-stdlib=libc++ -g3 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" LDFLAGS="-lc++" + make -j$(nproc) test selfcheck CXXOPTS="-stdlib=libc++ -g3 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" LDOPTS="-lc++" - name: Run AddressSanitizer if: matrix.os == 'ubuntu-24.04' run: | make clean - make -j$(nproc) test selfcheck CXXFLAGS="-O2 -g3 -fsanitize=address" LDFLAGS="-fsanitize=address" + make -j$(nproc) test selfcheck CXXOPTS="-O2 -g3 -fsanitize=address" LDOPTS="-fsanitize=address" env: ASAN_OPTIONS: detect_stack_use_after_return=1 @@ -100,7 +100,7 @@ jobs: if: matrix.os == 'ubuntu-24.04' run: | make clean - make -j$(nproc) test selfcheck CXXFLAGS="-O2 -g3 -fsanitize=undefined -fno-sanitize=signed-integer-overflow" LDFLAGS="-fsanitize=undefined -fno-sanitize=signed-integer-overflow" + make -j$(nproc) test selfcheck CXXOPTS="-O2 -g3 -fsanitize=undefined -fno-sanitize=signed-integer-overflow" LDOPTS="-fsanitize=undefined -fno-sanitize=signed-integer-overflow" env: UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1:report_error_type=1 @@ -109,4 +109,4 @@ jobs: if: false && matrix.os == 'ubuntu-24.04' && matrix.compiler == 'clang++' run: | make clean - make -j$(nproc) test selfcheck CXXFLAGS="-O2 -g3 -stdlib=libc++ -fsanitize=memory" LDFLAGS="-lc++ -fsanitize=memory" + make -j$(nproc) test selfcheck CXXOPTS="-O2 -g3 -stdlib=libc++ -fsanitize=memory" LDOPTS="-lc++ -fsanitize=memory" diff --git a/Makefile b/Makefile index 4a6ae6b7..d899d2cd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: testrunner simplecpp -CXXFLAGS = -Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wundef -Wno-multichar -Wold-style-cast -std=c++11 -g -LDFLAGS = -g +CXXFLAGS = -Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wundef -Wno-multichar -Wold-style-cast -std=c++11 -g $(CXXOPTS) +LDFLAGS = -g $(LDOPTS) %.o: %.cpp simplecpp.h $(CXX) $(CXXFLAGS) -c $< From 1678b7d229a7bcf833055766afa1496d68e1397c Mon Sep 17 00:00:00 2001 From: glankk Date: Thu, 14 Aug 2025 17:53:41 +0200 Subject: [PATCH 36/41] Remove execute bit from simplecpp.cpp/h (#494) --- simplecpp.cpp | 0 simplecpp.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 simplecpp.cpp mode change 100755 => 100644 simplecpp.h diff --git a/simplecpp.cpp b/simplecpp.cpp old mode 100755 new mode 100644 diff --git a/simplecpp.h b/simplecpp.h old mode 100755 new mode 100644 From f790009b5f19a20b6254c03e8d02c8d3e60f1244 Mon Sep 17 00:00:00 2001 From: glankk Date: Thu, 21 Aug 2025 11:12:21 +0200 Subject: [PATCH 37/41] Fix infinite loop with circular includes (#497) --- simplecpp.cpp | 18 ++++++----- simplecpp.h | 87 +++++++++++++++++++++++++-------------------------- test.cpp | 45 ++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 51 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 25d0d3c3..fd327549 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3109,15 +3109,13 @@ bool simplecpp::FileDataCache::getFileId(const std::string &path, FileID &id) #endif } -simplecpp::FileDataCache simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) +simplecpp::FileDataCache simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, FileDataCache cache) { #ifdef SIMPLECPP_WINDOWS if (dui.clearIncludeCache) nonExistingFilesCache.clear(); #endif - FileDataCache cache; - std::list filelist; // -include files @@ -3173,15 +3171,21 @@ simplecpp::FileDataCache simplecpp::load(const simplecpp::TokenList &rawtokens, const bool systemheader = (htok->str()[0] == '<'); const std::string header(htok->str().substr(1U, htok->str().size() - 2U)); - FileData *const filedata = cache.get(sourcefile, header, dui, systemheader, filenames, outputList).first; - if (!filedata) + const auto loadResult = cache.get(sourcefile, header, dui, systemheader, filenames, outputList); + const bool loaded = loadResult.second; + + if (!loaded) + continue; + + FileData *const filedata = loadResult.first; + + if (!filedata->tokens.front()) continue; if (dui.removeComments) filedata->tokens.removeComments(); - if (filedata->tokens.front()) - filelist.push_back(filedata->tokens.front()); + filelist.push_back(filedata->tokens.front()); } return cache; diff --git a/simplecpp.h b/simplecpp.h index 76487d6c..8268fa8d 100644 --- a/simplecpp.h +++ b/simplecpp.h @@ -353,49 +353,6 @@ namespace simplecpp { bool removeComments; /** remove comment tokens from included files */ }; - SIMPLECPP_LIB long long characterLiteralToLL(const std::string& str); - - SIMPLECPP_LIB FileDataCache load(const TokenList &rawtokens, std::vector &filenames, const DUI &dui, OutputList *outputList = nullptr); - - /** - * Preprocess - * @todo simplify interface - * @param output TokenList that receives the preprocessing output - * @param rawtokens Raw tokenlist for top sourcefile - * @param files internal data of simplecpp - * @param cache output from simplecpp::load() - * @param dui defines, undefs, and include paths - * @param outputList output: list that will receive output messages - * @param macroUsage output: macro usage - * @param ifCond output: #if/#elif expressions - */ - SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector &files, FileDataCache &cache, const DUI &dui, OutputList *outputList = nullptr, std::list *macroUsage = nullptr, std::list *ifCond = nullptr); - - /** - * Deallocate data - */ - SIMPLECPP_LIB void cleanup(FileDataCache &cache); - - /** Simplify path */ - SIMPLECPP_LIB std::string simplifyPath(std::string path); - - /** Convert Cygwin path to Windows path */ - SIMPLECPP_LIB std::string convertCygwinToWindowsPath(const std::string &cygwinPath); - - /** Returns the C version a given standard */ - SIMPLECPP_LIB cstd_t getCStd(const std::string &std); - - /** Returns the C++ version a given standard */ - SIMPLECPP_LIB cppstd_t getCppStd(const std::string &std); - - /** Returns the __STDC_VERSION__ value for a given standard */ - SIMPLECPP_LIB std::string getCStdString(const std::string &std); - SIMPLECPP_LIB std::string getCStdString(cstd_t std); - - /** Returns the __cplusplus value for a given standard */ - SIMPLECPP_LIB std::string getCppStdString(const std::string &std); - SIMPLECPP_LIB std::string getCppStdString(cppstd_t std); - struct SIMPLECPP_LIB FileData { /** The canonical filename associated with this data */ std::string filename; @@ -503,8 +460,50 @@ namespace simplecpp { container_type mData; name_map_type mNameMap; id_map_type mIdMap; - }; + + SIMPLECPP_LIB long long characterLiteralToLL(const std::string& str); + + SIMPLECPP_LIB FileDataCache load(const TokenList &rawtokens, std::vector &filenames, const DUI &dui, OutputList *outputList = nullptr, FileDataCache cache = {}); + + /** + * Preprocess + * @todo simplify interface + * @param output TokenList that receives the preprocessing output + * @param rawtokens Raw tokenlist for top sourcefile + * @param files internal data of simplecpp + * @param cache output from simplecpp::load() + * @param dui defines, undefs, and include paths + * @param outputList output: list that will receive output messages + * @param macroUsage output: macro usage + * @param ifCond output: #if/#elif expressions + */ + SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector &files, FileDataCache &cache, const DUI &dui, OutputList *outputList = nullptr, std::list *macroUsage = nullptr, std::list *ifCond = nullptr); + + /** + * Deallocate data + */ + SIMPLECPP_LIB void cleanup(FileDataCache &cache); + + /** Simplify path */ + SIMPLECPP_LIB std::string simplifyPath(std::string path); + + /** Convert Cygwin path to Windows path */ + SIMPLECPP_LIB std::string convertCygwinToWindowsPath(const std::string &cygwinPath); + + /** Returns the C version a given standard */ + SIMPLECPP_LIB cstd_t getCStd(const std::string &std); + + /** Returns the C++ version a given standard */ + SIMPLECPP_LIB cppstd_t getCppStd(const std::string &std); + + /** Returns the __STDC_VERSION__ value for a given standard */ + SIMPLECPP_LIB std::string getCStdString(const std::string &std); + SIMPLECPP_LIB std::string getCStdString(cstd_t std); + + /** Returns the __cplusplus value for a given standard */ + SIMPLECPP_LIB std::string getCppStdString(const std::string &std); + SIMPLECPP_LIB std::string getCppStdString(cppstd_t std); } #if defined(_MSC_VER) diff --git a/test.cpp b/test.cpp index de9f250b..ccb653ca 100644 --- a/test.cpp +++ b/test.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #define STRINGIZE_(x) #x @@ -2087,6 +2088,49 @@ static void systemInclude() ASSERT_EQUALS("", toString(outputList)); } +static void circularInclude() +{ + std::vector files; + simplecpp::FileDataCache cache; + + { + const char *const path = "test.h"; + const char code[] = + "#ifndef TEST_H\n" + "#define TEST_H\n" + "#include \"a/a.h\"\n" + "#endif\n" + ; + cache.insert({path, makeTokenList(code, files, path)}); + } + + { + const char *const path = "a/a.h"; + const char code[] = + "#ifndef A_H\n" + "#define A_H\n" + "#include \"../test.h\"\n" + "#endif\n" + ; + cache.insert({path, makeTokenList(code, files, path)}); + } + + simplecpp::OutputList outputList; + simplecpp::TokenList tokens2(files); + { + std::vector filenames; + const simplecpp::DUI dui; + + const char code[] = "#include \"test.h\"\n"; + const simplecpp::TokenList rawtokens = makeTokenList(code, files, "test.cpp"); + + cache = simplecpp::load(rawtokens, filenames, dui, &outputList, std::move(cache)); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); + } + + ASSERT_EQUALS("", toString(outputList)); +} + static void multiline1() { const char code[] = "#define A \\\n" @@ -3314,6 +3358,7 @@ int main(int argc, char **argv) TEST_CASE(missingHeader4); TEST_CASE(nestedInclude); TEST_CASE(systemInclude); + TEST_CASE(circularInclude); TEST_CASE(nullDirective1); TEST_CASE(nullDirective2); From fead0b280a3b242b15191c3ebaefc42b6159eb16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Fri, 22 Aug 2025 15:18:19 +0200 Subject: [PATCH 38/41] CI-unixish.yml: removed duplicated execution of integration test (#503) --- .github/workflows/CI-unixish.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index cb498b5e..c343e279 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -51,10 +51,6 @@ jobs: run: | make -j$(nproc) selfcheck - - name: integration test - run: | - python3 -m pytest integration_test.py -vv - - name: Run CMake run: | cmake -S . -B cmake.output From 7bca11f0ec435df3c9ccefd37c9a21a3c575355b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Fri, 22 Aug 2025 18:24:41 +0200 Subject: [PATCH 39/41] CI-unixish.yml: do not run with `g++` on `macos-*` as it is just an alias for `clang++` (#483) --- .github/workflows/CI-unixish.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index c343e279..718414d8 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -7,8 +7,13 @@ jobs: strategy: matrix: - compiler: [clang++, g++] os: [ubuntu-22.04, ubuntu-24.04, macos-13, macos-14, macos-15] + compiler: [clang++] + include: + - os: ubuntu-22.04 + compiler: g++ + - os: ubuntu-24.04 + compiler: g++ fail-fast: false runs-on: ${{ matrix.os }} From 285998182edd0280a9c1b5fe877f074fe441fd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Fri, 22 Aug 2025 20:51:46 +0200 Subject: [PATCH 40/41] fixed #478 - fail builds in CI on compiler warnings (#479) --- .github/workflows/CI-unixish.yml | 16 ++++++++-------- .github/workflows/CI-windows.yml | 2 +- .github/workflows/clang-tidy.yml | 2 +- CMakeLists.txt | 4 ++++ appveyor.yml | 3 ++- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 718414d8..c80aaba2 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -47,10 +47,10 @@ jobs: python3 -m pip install pytest - name: make simplecpp - run: make -j$(nproc) + run: make -j$(nproc) CXXOPTS="-Werror" - name: make test - run: make -j$(nproc) test + run: make -j$(nproc) test CXXOPTS="-Werror" - name: selfcheck run: | @@ -58,7 +58,7 @@ jobs: - name: Run CMake run: | - cmake -S . -B cmake.output + cmake -S . -B cmake.output -DCMAKE_COMPILE_WARNING_AS_ERROR=On - name: CMake simplecpp run: | @@ -81,19 +81,19 @@ jobs: if: matrix.os == 'ubuntu-24.04' && matrix.compiler == 'g++' run: | make clean - make -j$(nproc) test selfcheck CXXOPTS="-g3 -D_GLIBCXX_DEBUG" + make -j$(nproc) test selfcheck CXXOPTS="-Werror -g3 -D_GLIBCXX_DEBUG" - name: Run with libc++ hardening mode if: matrix.os == 'ubuntu-24.04' && matrix.compiler == 'clang++' run: | make clean - make -j$(nproc) test selfcheck CXXOPTS="-stdlib=libc++ -g3 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" LDOPTS="-lc++" + make -j$(nproc) test selfcheck CXXOPTS="-Werror -stdlib=libc++ -g3 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" LDOPTS="-lc++" - name: Run AddressSanitizer if: matrix.os == 'ubuntu-24.04' run: | make clean - make -j$(nproc) test selfcheck CXXOPTS="-O2 -g3 -fsanitize=address" LDOPTS="-fsanitize=address" + make -j$(nproc) test selfcheck CXXOPTS="-Werror -O2 -g3 -fsanitize=address" LDOPTS="-fsanitize=address" env: ASAN_OPTIONS: detect_stack_use_after_return=1 @@ -101,7 +101,7 @@ jobs: if: matrix.os == 'ubuntu-24.04' run: | make clean - make -j$(nproc) test selfcheck CXXOPTS="-O2 -g3 -fsanitize=undefined -fno-sanitize=signed-integer-overflow" LDOPTS="-fsanitize=undefined -fno-sanitize=signed-integer-overflow" + make -j$(nproc) test selfcheck CXXOPTS="-Werror -O2 -g3 -fsanitize=undefined -fno-sanitize=signed-integer-overflow" LDOPTS="-fsanitize=undefined -fno-sanitize=signed-integer-overflow" env: UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1:report_error_type=1 @@ -110,4 +110,4 @@ jobs: if: false && matrix.os == 'ubuntu-24.04' && matrix.compiler == 'clang++' run: | make clean - make -j$(nproc) test selfcheck CXXOPTS="-O2 -g3 -stdlib=libc++ -fsanitize=memory" LDOPTS="-lc++ -fsanitize=memory" + make -j$(nproc) test selfcheck CXXOPTS="-Werror -O2 -g3 -stdlib=libc++ -fsanitize=memory" LDOPTS="-lc++ -fsanitize=memory" diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 971f3827..d4c99388 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -40,7 +40,7 @@ jobs: - name: Run CMake run: | - cmake -G "Visual Studio 17 2022" -A x64 . || exit /b !errorlevel! + cmake -G "Visual Studio 17 2022" -A x64 -DCMAKE_COMPILE_WARNING_AS_ERROR=On . || exit /b !errorlevel! - name: Build run: | diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index a2f7b6dc..41d2ee6f 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -30,7 +30,7 @@ jobs: - name: Prepare CMake run: | - cmake -S . -B cmake.output -G "Unix Makefiles" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + cmake -S . -B cmake.output -G "Unix Makefiles" -DCMAKE_COMPILE_WARNING_AS_ERROR=On -DCMAKE_EXPORT_COMPILE_COMMANDS=ON env: CXX: clang-20 diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ab0166e..f9e3eb6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,10 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") add_compile_options_safe(-Wuseless-cast) elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + # TODO: bump warning level + #add_compile_options(/W4) # Warning Level + # TODO: enable warning + add_compile_options(/wd4267) # warning C4267: '...': conversion from 'size_t' to 'unsigned int', possible loss of data elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Weverything) # no need for c++98 compatibility diff --git a/appveyor.yml b/appveyor.yml index ea8dd1df..09aa6cbe 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,9 +10,10 @@ environment: build_script: - ECHO Building %configuration% %platform% with MSVC %VisualStudioVersion% using %PlatformToolset% PlatformToolset - - cmake -G "Visual Studio 14" . + - cmake -DCMAKE_COMPILE_WARNING_AS_ERROR=On -G "Visual Studio 14" . - dir - 'CALL "C:\Program Files (x86)\Microsoft Visual Studio %VisualStudioVersion%\VC\vcvarsall.bat" %vcvarsall_platform%' + - set _CL_=/WX - msbuild "simplecpp.sln" /consoleloggerparameters:Verbosity=minimal /target:Build /property:Configuration="%configuration%";Platform=%platform% /p:PlatformToolset=%PlatformToolset% /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" test_script: From 538c5c4cd8baf806835c0d51ce2a9814ca883a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Sat, 23 Aug 2025 10:47:23 +0200 Subject: [PATCH 41/41] fixed #485 - addressed zizmor findings in GitHub Actions (#486) --- .github/workflows/CI-unixish.yml | 5 +++++ .github/workflows/CI-windows.yml | 5 +++++ .github/workflows/clang-tidy.yml | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index c80aaba2..60361389 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -2,6 +2,9 @@ name: CI-unixish on: [push, pull_request] +permissions: + contents: read + jobs: build: @@ -23,6 +26,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Install missing software on ubuntu if: matrix.os == 'ubuntu-24.04' diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index d4c99388..767bba6c 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -6,6 +6,9 @@ name: CI-windows on: [push,pull_request] +permissions: + contents: read + defaults: run: shell: cmd @@ -23,6 +26,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Setup msbuild.exe uses: microsoft/setup-msbuild@v2 diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 41d2ee6f..fd71bdfe 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -4,6 +4,9 @@ name: clang-tidy on: [push, pull_request] +permissions: + contents: read + jobs: build: @@ -11,6 +14,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Install missing software run: |