diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000..e0b384bf --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,57 @@ +--- +Checks: > + *, + -abseil-*, + -altera-*, + -android-*, + -boost-*, + -cert-*, + -clang-analyzer-*, + -cppcoreguidelines-*, + -fuchsia-*, + -google-*, + -hicpp-*, + -linuxkernel-*, + -llvm-*, + -llvmlibc-*, + -mpi-*, + -objc-*, + -openmp-*, + -zircon-*, + -bugprone-branch-clone, + -bugprone-easily-swappable-parameters, + -bugprone-narrowing-conversions, + -bugprone-switch-missing-default-case, + -bugprone-throwing-static-initialization, + -bugprone-unchecked-string-to-number-conversion, + -concurrency-mt-unsafe, + -misc-no-recursion, + -misc-non-private-member-variables-in-classes, + -misc-use-anonymous-namespace, + -modernize-avoid-c-arrays, + -modernize-loop-convert, + -modernize-pass-by-value, + -modernize-use-nodiscard, + -modernize-use-trailing-return-type, + -readability-avoid-nested-conditional-operator, + -readability-braces-around-statements, + -readability-function-cognitive-complexity, + -readability-function-size, + -readability-implicit-bool-conversion, + -readability-identifier-length, + -readability-isolate-declaration, + -readability-magic-numbers, + -readability-redundant-inline-specifier, + -readability-use-concise-preprocessor-directives, + -readability-uppercase-literal-suffix, + -performance-avoid-endl, + -performance-inefficient-string-concatenation, + -performance-no-automatic-move, + -performance-noexcept-move-constructor +HeaderFilterRegex: '.*' +WarningsAsErrors: '*' +CheckOptions: + - key: misc-const-correctness.WarnPointersAsValues + value: '1' + - key: misc-const-correctness.TransformPointersAsValues + value: '1' \ No newline at end of file diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..bd256eb3 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,16 @@ +# +# This file lists revisions that should be ignored when considering +# attribution for the actual code written. Code style changes should +# not be considered as modifications with regards to attribution. +# +# To see clean and meaningful blame information. +# $ git blame important.py --ignore-revs-file .git-blame-ignore-revs +# +# To configure git to automatically ignore revisions listed in a file +# on every call to git blame. +# $ git config blame.ignoreRevsFile .git-blame-ignore-revs +# +# Ignore changes introduced when doing global file format changes + +# Switch to uncrustify (#517) +cfd179711f413aa8e0da9c2f437ad4f8938d5f70 diff --git a/.github/workflows/CI-mingw.yml b/.github/workflows/CI-mingw.yml new file mode 100644 index 00000000..390de83d --- /dev/null +++ b/.github/workflows/CI-mingw.yml @@ -0,0 +1,141 @@ +name: CI-mingw + +on: [push, pull_request] + +permissions: + contents: read + +defaults: + run: + shell: msys2 {0} + +jobs: + build: + + strategy: + matrix: + compiler: [g++, clang++] + # TODO: add MSYS after #556 is fixed + msystem: [MINGW32, MINGW64, CLANG64] + include: + #- msystem: MSYS + # pkg-prefix: '' + - msystem: MINGW32 + pkg-prefix: 'mingw-w64-i686-' + - msystem: MINGW64 + pkg-prefix: 'mingw-w64-x86_64-' + - msystem: CLANG64 + pkg-prefix: 'mingw-w64-clang-x86_64-' + - compiler: g++ + compiler-pkg: gcc + - compiler: clang++ + compiler-pkg: clang + exclude: + - msystem: CLANG64 + compiler: g++ + # the mingw-w64-i686-clang package is no longer available + - msystem: MINGW32 + compiler: clang++ + fail-fast: false + + runs-on: windows-2025 + + env: + CXX: ${{ matrix.compiler }} + + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set up MSYS2 + uses: msys2/setup-msys2@v2 + with: + release: false # use pre-installed + msystem: ${{ matrix.msystem }} + # TODO: install mingw-w64-x86_64-make and use mingw32.make instead - currently fails with "Windows Subsystem for Linux has no installed distributions." + # TODO: also run tests with non-prefixed Python? + install: >- + make + ${{ matrix.pkg-prefix }}cmake + ${{ matrix.pkg-prefix }}python + ${{ matrix.pkg-prefix }}python-pytest + + - name: install compiler + run: | + pacman -S --noconfirm ${{ matrix.pkg-prefix }}${{ matrix.compiler-pkg }} + ${CXX} -v + + - name: make simplecpp + run: | + make -j$(nproc) CXXOPTS="-Werror" + + # gcc *and* clang are required to run-tests.py + # install it at this point since it has gcc as dependency which might interfere with the build + - name: install compiler (clang) + if: matrix.compiler == 'g++' + run: | + pacman -S --noconfirm clang + + - name: install compiler (gcc) + if: matrix.compiler == 'clang++' + run: | + pacman -S --noconfirm gcc + + - name: make test + run: | + # TODO: run tests with Windows paths + make -j$(nproc) test + + - name: selfcheck + run: | + # TODO: run tests with Windows paths + make -j$(nproc) selfcheck + + - name: make (c++14) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++14" + + - name: make (c++17) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++17" + + - name: make (c++20) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++20" + + - name: make (c++23) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++23" + + - name: Run CMake + run: | + cmake -S . -B cmake.output -DCMAKE_COMPILE_WARNING_AS_ERROR=On + + - name: CMake simplecpp + run: | + cmake --build cmake.output --target simplecpp -- -j $(nproc) + + - name: CMake testrunner + run: | + cmake --build cmake.output --target testrunner -- -j $(nproc) + + - name: Run testrunner + run: | + ./cmake.output/testrunner + + - name: Run with libstdc++ debug mode + if: matrix.compiler == 'g++' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -g3 -D_GLIBCXX_DEBUG" + + - name: Run with libc++ hardening mode + if: matrix.compiler == 'clang++' && matrix.msystem == 'CLANG64' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -stdlib=libc++ -g3 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" LDOPTS="-lc++" diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 5205d85e..cdda088b 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -1,38 +1,198 @@ -name: CI Unixish +name: CI-unixish on: [push, pull_request] +permissions: + contents: read + jobs: build: strategy: matrix: - compiler: [clang++, g++] - os: [ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-10.15] - fail-fast: true + os: [ubuntu-22.04, ubuntu-22.04-arm, ubuntu-24.04, ubuntu-24.04-arm, macos-14, macos-15, macos-15-intel, macos-26, macos-26-intel] + compiler: [clang++] + include: + - os: ubuntu-22.04 + compiler: g++ + - os: ubuntu-22.04-arm + compiler: g++ + - os: ubuntu-24.04 + compiler: g++ + - os: ubuntu-24.04-arm + compiler: g++ + fail-fast: false runs-on: ${{ matrix.os }} + env: + CXX: ${{ matrix.compiler }} + steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 + with: + persist-credentials: false + + # the man-db trigger causes package installations to stall for several minutes at times. so just drop the package. + # see https://github.com/actions/runner/issues/4030 + - name: Remove man-db package on ubuntu + if: matrix.os == 'ubuntu-24.04' + run: | + sudo apt-get update + sudo apt-get remove man-db - name: Install missing software on ubuntu - if: matrix.os == 'ubuntu-20.04' + if: matrix.os == 'ubuntu-24.04' run: | sudo apt-get update sudo apt-get install valgrind - + + # llvm contains llvm-profdata + - name: Install missing software on ubuntu (clang++) + if: contains(matrix.os, 'ubuntu') && matrix.compiler == 'clang++' + run: | + sudo apt-get update + sudo apt-get install libc++-dev llvm + + # coreutils contains "nproc" + - name: Install missing software on macos + if: contains(matrix.os, 'macos') + run: | + brew install coreutils + + - name: Install missing Python packages + run: | + python3 -m pip config set global.break-system-packages true + python3 -m pip install pytest + - name: make simplecpp - run: make -j$(nproc) CXX=${{ matrix.compiler }} + run: make -j$(nproc) CXXOPTS="-Werror" - name: make test - run: make -j$(nproc) test CXX=${{ matrix.compiler }} + run: make -j$(nproc) test CXXOPTS="-Werror" + + - name: selfcheck + run: | + make -j$(nproc) selfcheck + + - name: make (c++14) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++14" - - name: ensure that simplecpp.cpp uses c++03 - run: CXX=${{ matrix.compiler }} ; $CXX -fsyntax-only -std=c++98 simplecpp.cpp + - name: make (c++17) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++17" + + - name: make (c++20) + run: | + make clean + make -j$(nproc) CXXOPTS="-Werror -std=c++20" + + - name: make (c++23) + run: | + make clean + # ubuntu-22.04 and macos-14 do not support c++23 yet + make -j$(nproc) CXXOPTS="-Werror -std=c++2b" + + - name: Run CMake + run: | + cmake -S . -B cmake.output -Werror=dev --warn-uninitialized -DCMAKE_COMPILE_WARNING_AS_ERROR=On + + - name: CMake simplecpp + run: | + cmake --build cmake.output --target simplecpp -- -j $(nproc) + + - name: CMake testrunner + run: | + cmake --build cmake.output --target testrunner -- -j $(nproc) + ./cmake.output/testrunner + # Re-run tests from within the build directory to validate that + # SIMPLECPP_TEST_SOURCE_DIR is correctly defined and resolved + (cd cmake.output && ./testrunner) - name: Run valgrind - if: matrix.os == 'ubuntu-20.04' + if: matrix.os == 'ubuntu-24.04' run: | + make clean + make -j$(nproc) CXXOPTS="-O1" valgrind --leak-check=full --num-callers=50 --show-reachable=yes --track-origins=yes --gen-suppressions=all --error-exitcode=42 ./testrunner - + # TODO: run Python tests with valgrind + VALGRIND_TOOL=memcheck ./selfcheck.sh + + - name: Run with libstdc++ debug mode + if: matrix.os == 'ubuntu-24.04' && matrix.compiler == 'g++' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -g3 -D_GLIBCXX_DEBUG" + + - name: Run with libc++ hardening mode + if: matrix.os == 'ubuntu-24.04' && matrix.compiler == 'clang++' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -stdlib=libc++ -g3 -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" LDOPTS="-lc++" + + - name: Run AddressSanitizer + if: matrix.os == 'ubuntu-24.04' || matrix.os == 'macos-26' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -O2 -g3 -fsanitize=address" LDOPTS="-fsanitize=address" + env: + ASAN_OPTIONS: detect_stack_use_after_return=1 + + - name: Run UndefinedBehaviorSanitizer + if: matrix.os == 'ubuntu-24.04' || matrix.os == 'macos-26' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -O2 -g3 -fsanitize=undefined -fno-sanitize=signed-integer-overflow" LDOPTS="-fsanitize=undefined -fno-sanitize=signed-integer-overflow" + env: + UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1:report_error_type=1 + + # TODO: requires instrumented libc++ + - name: Run MemorySanitizer + if: false && matrix.os == 'ubuntu-24.04' && matrix.compiler == 'clang++' + run: | + make clean + make -j$(nproc) test selfcheck CXXOPTS="-Werror -O2 -g3 -stdlib=libc++ -fsanitize=memory" LDOPTS="-lc++ -fsanitize=memory" + + - name: Run callgrind + if: matrix.os == 'ubuntu-24.04' + run: | + wget https://github.com/danmar/simplecpp/archive/refs/tags/1.5.1.tar.gz + tar xvf 1.5.1.tar.gz + rm -f 1.5.1.tar.gz + + make clean + make -j$(nproc) CXXOPTS="-O2 -g3" simplecpp + VALGRIND_TOOL=callgrind SIMPLECPP_PATH=simplecpp-1.5.1 ./selfcheck.sh >callgrind.log || (cat callgrind.log && false) + cat callgrind.log + + # PGO - start + make clean + make -j$(nproc) CXXOPTS="-O2 -g3 -fprofile-generate" LDOPTS="-fprofile-generate" simplecpp + SIMPLECPP_PATH=simplecpp-1.5.1 ./selfcheck.sh >/dev/null + + if compgen -G "default_*.profraw" > /dev/null; then + llvm-profdata merge -output=default.profdata default_*.profraw + fi + + make clean + make -j$(nproc) CXXOPTS="-O2 -g3 -fprofile-use" LDOPTS="-fprofile-use" simplecpp + VALGRIND_TOOL=callgrind SIMPLECPP_PATH=simplecpp-1.5.1 ./selfcheck.sh >callgrind_pgo.log || (cat callgrind_pgo.log && false) + cat callgrind_pgo.log + # PGO - end + + for f in callgrind.out.*; + do + callgrind_annotate --auto=no $f > $f.annotated.log + head -50 $f.annotated.log + done + rm -rf simplecpp-1.5.1 + + - uses: actions/upload-artifact@v4 + if: matrix.os == 'ubuntu-24.04' + with: + name: Callgrind Output - ${{ matrix.compiler }} + path: | + ./callgrind.* diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index f64e34f9..cfced0d2 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -6,6 +6,9 @@ name: CI-windows on: [push,pull_request] +permissions: + contents: read + defaults: run: shell: cmd @@ -15,28 +18,65 @@ jobs: build: strategy: matrix: - # windows 2016 should default to VS 2017. Not supported by setup-msbuild - os: [windows-2019] - fail-fast: true + os: [windows-2022, windows-2025, windows-11-arm] + config: [Release, Debug] + fail-fast: false runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 + with: + persist-credentials: false - name: Setup msbuild.exe - uses: microsoft/setup-msbuild@v1.0.2 - - - name: Run cmake + uses: microsoft/setup-msbuild@v2 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.14' + check-latest: true + + - name: Install missing Python packages + run: | + python -m pip install pip --upgrade || exit /b !errorlevel! + python -m pip install pytest || exit /b !errorlevel! + + - name: Run CMake run: | - cmake -G "Visual Studio 16" . -A x64 - dir - + cmake -G "Visual Studio 17 2022" -A x64 -Werror=dev --warn-uninitialized -DCMAKE_COMPILE_WARNING_AS_ERROR=On . || exit /b !errorlevel! + - name: Build run: | - msbuild -m simplecpp.sln /p:Configuration=Release /p:Platform=x64 + msbuild -m simplecpp.sln /p:Configuration=${{ matrix.config }} /p:Platform=x64 || exit /b !errorlevel! - name: Test run: | - .\Release\testrunner.exe - + .\${{ matrix.config }}\testrunner.exe || exit /b !errorlevel! + + - name: Selfcheck + run: | + .\${{ matrix.config }}\simplecpp.exe simplecpp.cpp -e || exit /b !errorlevel! + + - name: integration test + run: | + set SIMPLECPP_EXE_PATH=.\${{ matrix.config }}\simplecpp.exe + python -m pytest integration_test.py -vv || exit /b !errorlevel! + + - name: Run CMake (c++17) + run: | + cmake -S . -B build.cxx17 -G "Visual Studio 17 2022" -A x64 -Werror=dev --warn-uninitialized -DCMAKE_CXX_STANDARD=17 -DCMAKE_COMPILE_WARNING_AS_ERROR=On || exit /b !errorlevel! + + - name: Build (c++17) + run: | + msbuild -m build.cxx17\simplecpp.sln /p:Configuration=${{ matrix.config }} /p:Platform=x64 || exit /b !errorlevel! + + - name: Run CMake (c++20) + run: | + cmake -S . -B build.cxx20 -G "Visual Studio 17 2022" -A x64 -Werror=dev --warn-uninitialized -DCMAKE_CXX_STANDARD=20 -DCMAKE_COMPILE_WARNING_AS_ERROR=On || exit /b !errorlevel! + + - name: Build (c++20) + run: | + msbuild -m build.cxx20\simplecpp.sln /p:Configuration=${{ matrix.config }} /p:Platform=x64 || exit /b !errorlevel! + diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml new file mode 100644 index 00000000..333672d7 --- /dev/null +++ b/.github/workflows/clang-tidy.yml @@ -0,0 +1,51 @@ +# Syntax reference https://help.github.com/en/actions/reference/workflow-syntax-for-github-actions +# Environment reference https://help.github.com/en/actions/reference/virtual-environments-for-github-hosted-runners +name: clang-tidy + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + + # the man-db trigger causes package installations to stall for several minutes at times. so just drop the package. + # see https://github.com/actions/runner/issues/4030 + - name: Remove man-db package + run: | + sudo apt-get update + sudo apt-get remove man-db + + - name: Install missing software + run: | + sudo apt-get update + sudo apt-get install -y cmake make + + - name: Install clang + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 22 + sudo apt-get install clang-tidy-22 + + - name: Verify clang-tidy configuration + run: | + clang-tidy-22 --verify-config + + - name: Prepare CMake + run: | + cmake -S . -B cmake.output -Werror=dev --warn-uninitialized -DCMAKE_CXX_STANDARD=23 -DCMAKE_COMPILE_WARNING_AS_ERROR=On -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + env: + CXX: clang-22 + + - name: Clang-Tidy + run: | + run-clang-tidy-22 -q -j $(nproc) -enable-check-profile -p=cmake.output diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 00000000..e9f1361d --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,62 @@ +# Syntax reference https://help.github.com/en/actions/reference/workflow-syntax-for-github-actions +# Environment reference https://help.github.com/en/actions/reference/virtual-environments-for-github-hosted-runners +name: format + +on: + push: + branches: + - 'master' + - 'releases/**' + - '1.*' + tags: + - '1.*' + pull_request: + +permissions: + contents: read + +jobs: + format: + + runs-on: ubuntu-22.04 + + defaults: + run: + shell: bash -euo pipefail {0} + + env: + UNCRUSTIFY_INSTALL_DIR: ${{ github.workspace }}/runformat-uncrustify + + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Determine uncrustify version + id: get-uncrustify-version + run: | + version="$(./runformat --expected-uncrustify-version)" + echo "Expected uncrustify version: $version" + echo "version=$version" >> "$GITHUB_OUTPUT" + + - name: Set UNCRUSTIFY_VERSION env variable + run: | + version=$(./runformat --expected-uncrustify-version) + echo "version [$version]" + echo "UNCRUSTIFY_VERSION=${version}" >> "$GITHUB_ENV" + + - name: Cache uncrustify + uses: actions/cache@v4 + id: cache-uncrustify + with: + path: ${{ env.UNCRUSTIFY_INSTALL_DIR }} + key: ${{ runner.os }}-uncrustify-${{ steps.get-uncrustify-version.outputs.version }} + + - name: Install uncrustify + if: steps.cache-uncrustify.outputs.cache-hit != 'true' + run: | + ./runformat --install --install-dir "${UNCRUSTIFY_INSTALL_DIR}" + + - name: Uncrustify check + run: | + ./runformat diff --git a/.gitignore b/.gitignore index f3bb1e3a..34d9c55d 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,11 @@ *.app simplecpp testrunner +/.runformat-uncrustify + +# CLion +/.idea +/cmake-build-* + +# python +__pycache__/ diff --git a/.uncrustify.cfg b/.uncrustify.cfg new file mode 100644 index 00000000..81722ff7 --- /dev/null +++ b/.uncrustify.cfg @@ -0,0 +1,170 @@ +# Uncrustify-0.80.1_f + +# The original size of tabs in the input. +# +# Default: 8 +input_tab_size = 4 # unsigned number + +# The size of tabs in the output (only used if align_with_tabs=true). +# +# Default: 8 +output_tab_size = 4 # unsigned number + +# Add or remove space between 'while' and '('. +sp_while_paren_open = add # ignore/add/remove/force + +# Add or remove space around boolean operators '&&' and '||'. +sp_bool = force # ignore/add/remove/force + +# Add or remove space inside '(' and ')'. +sp_inside_paren = remove # ignore/add/remove/force + +# Add or remove space between nested parentheses, i.e. '((' vs. ') )'. +sp_paren_paren = remove # ignore/add/remove/force + +# Add or remove space between ')' and '{'. +sp_paren_brace = force # ignore/add/remove/force + +# Add or remove space between pointer stars '*'. +sp_between_ptr_star = remove # ignore/add/remove/force + +# Add or remove space before '<'. +sp_before_angle = remove # ignore/add/remove/force + +# Add or remove space inside '<' and '>'. +sp_inside_angle = remove # ignore/add/remove/force + +# Add or remove space after '>'. +sp_after_angle = add # ignore/add/remove/force + +# Add or remove space between '>' and '(' as found in 'new List(foo);'. +sp_angle_paren = remove # ignore/add/remove/force + +# Add or remove space between '>' and a word as in 'List m;' or +# 'template static ...'. +sp_angle_word = add # ignore/add/remove/force + +# Add or remove space between '>' and '>' in '>>' (template stuff). +# +# Default: add +sp_angle_shift = ignore # ignore/add/remove/force + +# (C++11) Permit removal of the space between '>>' in 'foo >'. Note +# that sp_angle_shift cannot remove the space without this option. +sp_permit_cpp11_shift = true # true/false + +# Add or remove space before '(' of control statements ('if', 'for', 'switch', +# 'while', etc.). +sp_before_sparen = force # ignore/add/remove/force + +# Add or remove space inside '(' and ')' of control statements. +sp_inside_sparen = remove # ignore/add/remove/force + +# Add or remove space after ')' of control statements. +sp_after_sparen = force # ignore/add/remove/force + +# Add or remove space between ')' and '{' of of control statements. +sp_sparen_brace = force # ignore/add/remove/force + +# Add or remove space before ';' in non-empty 'for' statements. +sp_before_semi_for = remove # ignore/add/remove/force + +# Add or remove space after the final semicolon of an empty part of a for +# statement, as in 'for ( ; ; )'. +sp_after_semi_for_empty = remove # ignore/add/remove/force + +# Add or remove space before '[]'. +sp_before_squares = remove # ignore/add/remove/force + +# Add or remove space before C++17 structured bindings. +sp_cpp_before_struct_binding = ignore # ignore/add/remove/force + +# Add or remove space inside a non-empty '[' and ']'. +sp_inside_square = remove # ignore/add/remove/force + +# Add or remove space after class ':'. +sp_after_class_colon = force # ignore/add/remove/force + +# Add or remove space before class ':'. +sp_before_class_colon = force # ignore/add/remove/force + +# Add or remove space inside '{}'. +sp_inside_braces_empty = remove # ignore/add/remove/force + +# Add or remove space between 'else' and '{' if on the same line. +sp_else_brace = force # ignore/add/remove/force + +# Add or remove space between '}' and 'else' if on the same line. +sp_brace_else = force # ignore/add/remove/force + +# Add or remove space before the '{' of a 'catch' statement, if the '{' and +# 'catch' are on the same line, as in 'catch (decl) {'. +sp_catch_brace = force # ignore/add/remove/force + +# Add or remove space between '}' and 'catch' if on the same line. +sp_brace_catch = force # ignore/add/remove/force + +# The number of columns to indent per level. Usually 2, 3, 4, or 8. +# +# Default: 8 +indent_columns = 4 # unsigned number + +# How to use tabs when indenting code. +# +# 0: Spaces only +# 1: Indent with tabs to brace level, align with spaces (default) +# 2: Indent and align with tabs, using spaces when not on a tabstop +# +# Default: 1 +indent_with_tabs = 0 # unsigned number + +# Whether to indent the body of a 'namespace'. +indent_namespace = true # true/false + +# Whether the 'class' body is indented. +indent_class = true # true/false + +# How to indent access specifiers that are followed by a +# colon. +# +# >0: Absolute column where 1 is the leftmost column +# <=0: Subtract from brace indent +# +# Default: 1 +indent_access_spec = -4 # number + +# Whether to collapse empty blocks between '{' and '}' except for functions. +# Use nl_collapse_empty_body_functions to specify how empty function braces +# should be formatted. +nl_collapse_empty_body = true # true/false + +# Whether to collapse empty blocks between '{' and '}' for functions only. +# If true, overrides nl_inside_empty_func. +nl_collapse_empty_body_functions = true # true/false + +# Whether to convert all tabs to spaces in comments. If false, tabs in +# comments are left alone, unless used for indenting. +cmt_convert_tab_to_spaces = true # true/false + +# An offset value that controls the indentation of the body of a multiline #define. +# 'body' refers to all the lines of a multiline #define except the first line. +# Requires 'pp_ignore_define_body = false'. +# +# <0: Absolute column: the body indentation starts off at the specified column +# (ex. -3 ==> the body is indented starting from column 3) +# >=0: Relative to the column of the '#' of '#define' +# (ex. 3 ==> the body is indented starting 3 columns at the right of '#') +# +# Default: 8 +pp_multiline_define_body_indent = 4 # number + +# The value might be used twice: +# - at the assignment +# - at the opening brace +# +# To prevent the double use of the indentation value, use this option with the +# value 'true'. +# +# true: indentation will be used only once +# false: indentation will be used every time (default) +indent_cpp_lambda_only_once = true # true/false diff --git a/CMakeLists.txt b/CMakeLists.txt index b1baef3d..0a90efae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,38 +1,113 @@ -cmake_minimum_required (VERSION 3.5) +cmake_minimum_required (VERSION 3.10) project (simplecpp LANGUAGES CXX) -if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") - add_compile_options(-Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wshadow -Wundef) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +include(CheckCXXCompilerFlag) + +if (WIN32) + # prevent simplifyPath_cppcheck() from wasting time on looking for a hypothetical network host + add_definitions(-DUNCHOST=$ENV{COMPUTERNAME}) endif() -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") +function(add_compile_options_safe FLAG) + string(MAKE_C_IDENTIFIER "HAS_CXX_FLAG${FLAG}" mangled_flag) + check_cxx_compiler_flag(${FLAG} ${mangled_flag}) + if (${mangled_flag}) + add_compile_options(${FLAG}) + endif() +endfunction() + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options(-pedantic) + + add_compile_options(-Wall) + add_compile_options(-Wextra) + add_compile_options(-Wcast-qual) # Cast for removing type qualifiers + add_compile_options(-Wfloat-equal) # Floating values used in equality comparisons + add_compile_options(-Wmissing-declarations) # If a global function is defined without a previous declaration + add_compile_options(-Wmissing-format-attribute) # + add_compile_options(-Wpacked) # + add_compile_options(-Wredundant-decls) # if anything is declared more than once in the same scope + add_compile_options(-Wundef) + add_compile_options(-Woverloaded-virtual) # when a function declaration hides virtual functions from a base class + + add_compile_options(-Wsuggest-attribute=noreturn) + if (NOT MINGW) + add_compile_options_safe(-Wuseless-cast) + endif() + + # we are not interested in these + set_source_files_properties(test.cpp PROPERTIES COMPILE_FLAGS -Wno-multichar) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + add_compile_options(/W4) # Warning Level + + add_compile_options(/wd4127) # warning C4127: conditional expression is constant + add_compile_options(/wd4244) # warning C4244: 'x': conversion from 'int' to 'char', possible loss of data + add_compile_options(/wd4267) # warning C4267: '...': conversion from 'size_t' to 'unsigned int', possible loss of data + add_compile_options(/wd4706) # warning C4706: assignment within conditional expression +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Weverything) + # no need for c++98 compatibility add_compile_options(-Wno-c++98-compat-pedantic) - # these are not really fixable - add_compile_options(-Wno-exit-time-destructors -Wno-global-constructors) + + # these are not really fixable until newer standards + add_compile_options(-Wno-exit-time-destructors) + add_compile_options(-Wno-global-constructors) + add_compile_options(-Wno-weak-vtables) + add_compile_options_safe(-Wno-unsafe-buffer-usage) + add_compile_options_safe(-Wno-nrvo) + + # contradicts -Wcovered-switch-default + add_compile_options(-Wno-switch-default) + if (MINGW) + add_compile_options(-Wno-reserved-macro-identifier) + add_compile_options(-Wno-unused-macros) + endif() + + # these are experimental warnings which might produce false positives + add_compile_options_safe(-Wno-thread-safety-negative) + add_compile_options_safe(-Wno-thread-safety-beta) + # TODO: fix these? - add_compile_options(-Wno-zero-as-null-pointer-constant -Wno-padded -Wno-sign-conversion -Wno-conversion -Wno-old-style-cast) -endif() + add_compile_options(-Wno-padded) + add_compile_options(-Wno-sign-conversion) + add_compile_options(-Wno-implicit-int-conversion) + add_compile_options(-Wno-shorten-64-to-32) + add_compile_options(-Wno-shadow-field-in-constructor) + + # we are not interested in these + set_source_files_properties(test.cpp PROPERTIES COMPILE_FLAGS "-Wno-multichar -Wno-four-char-constants") + + if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 14 OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 14) + # TODO: verify this regression still exists in clang-15 + if (CMAKE_BUILD_TYPE STREQUAL "Release" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + # work around performance regression - see https://github.com/llvm/llvm-project/issues/53555 + add_compile_options(-mllvm -inline-deferral) + endif() -add_executable(simplecpp simplecpp.cpp main.cpp) -set_property(TARGET simplecpp PROPERTY CXX_STANDARD 11) - -# it is not possible to set a standard older than C++14 with Visual Studio -if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # we need to create a dummy library as -fsyntax-only will not produce any output files causing the build to fail - add_library(simplecpp-03-syntax STATIC simplecpp.cpp) - target_compile_options(simplecpp-03-syntax PRIVATE -std=c++03) - if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") - target_compile_options(simplecpp-03-syntax PRIVATE -Wno-long-long) - elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(simplecpp-03-syntax PRIVATE -Wno-c++11-long-long) + # use force DWARF 4 debug format since not all tools might be able to handle DWARF 5 yet - e.g. valgrind on ubuntu 20.04 + add_compile_options(-gdwarf-4) + endif() + if (APPLE) + # CMake is sometimes chosing the wrong compiler on macos-* runners + # see https://github.com/actions/runner/issues/4034 + add_compile_options(-Wno-poison-system-directories) endif() - add_dependencies(simplecpp simplecpp-03-syntax) endif() -add_executable(testrunner simplecpp.cpp test.cpp) -set_property(TARGET testrunner PROPERTY CXX_STANDARD 11) +add_library(simplecpp_obj OBJECT simplecpp.cpp) + +add_executable(simplecpp $ main.cpp) +add_executable(testrunner $ test.cpp) +target_compile_definitions(testrunner + PRIVATE + SIMPLECPP_TEST_SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}" +) enable_testing() add_test(NAME testrunner COMMAND testrunner) diff --git a/LICENSE b/LICENSE index 341c30bd..b1f013e9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,166 +1,14 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 +BSD Zero Clause License - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. +Copyright (c) 2023 simplecpp team +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. - +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile index 3251611e..b7b54597 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,29 @@ all: testrunner simplecpp -CXXFLAGS = -Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wredundant-decls -Wshadow -Wundef -Wno-multichar -std=c++0x -g -LDFLAGS = -g +CPPFLAGS ?= +CXXFLAGS = -Wall -Wextra -pedantic -Wcast-qual -Wfloat-equal -Wmissing-declarations -Wmissing-format-attribute -Wpacked -Wredundant-decls -Wundef -Woverloaded-virtual -std=c++11 -g $(CXXOPTS) +LDFLAGS = -g $(LDOPTS) -%.o: %.cpp simplecpp.h - $(CXX) $(CXXFLAGS) -c $< +# Define test source dir macro for compilation (preprocessor flags) +TEST_CPPFLAGS = -DSIMPLECPP_TEST_SOURCE_DIR=\"$(CURDIR)\" + +# Only test.o gets the define +test.o: CPPFLAGS += $(TEST_CPPFLAGS) +test.o: CXXFLAGS += -Wno-multichar +%.o: %.cpp simplecpp.h + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< testrunner: test.o simplecpp.o $(CXX) $(LDFLAGS) simplecpp.o test.o -o testrunner test: testrunner simplecpp - # The -std=c++03 makes sure that simplecpp.cpp is C++03 conformant. We don't require a C++11 compiler - g++ -std=c++03 -fsyntax-only simplecpp.cpp && ./testrunner && python run-tests.py + ./testrunner + python3 run-tests.py + python3 -m pytest integration_test.py -vv + +selfcheck: simplecpp + CXX=$(CXX) ./selfcheck.sh simplecpp: main.o simplecpp.o $(CXX) $(LDFLAGS) main.o simplecpp.o -o simplecpp diff --git a/appveyor.yml b/appveyor.yml index 00136602..09aa6cbe 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,10 +10,12 @@ environment: build_script: - ECHO Building %configuration% %platform% with MSVC %VisualStudioVersion% using %PlatformToolset% PlatformToolset - - cmake -G "Visual Studio 14" . + - cmake -DCMAKE_COMPILE_WARNING_AS_ERROR=On -G "Visual Studio 14" . - dir - 'CALL "C:\Program Files (x86)\Microsoft Visual Studio %VisualStudioVersion%\VC\vcvarsall.bat" %vcvarsall_platform%' + - set _CL_=/WX - msbuild "simplecpp.sln" /consoleloggerparameters:Verbosity=minimal /target:Build /property:Configuration="%configuration%";Platform=%platform% /p:PlatformToolset=%PlatformToolset% /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" test_script: - debug\testrunner.exe + - debug\simplecpp.exe simplecpp.cpp -e diff --git a/integration_test.py b/integration_test.py new file mode 100644 index 00000000..3ca2fd02 --- /dev/null +++ b/integration_test.py @@ -0,0 +1,505 @@ +## test with python -m pytest integration_test.py + +import os +import pathlib +import platform +import pytest +from testutils import simplecpp, format_include_path_arg, format_include + +def __test_relative_header_create_header(dir, with_pragma_once=True): + header_file = os.path.join(dir, 'test.h') + with open(header_file, 'wt') as f: + f.write(f""" + {"#pragma once" if with_pragma_once else ""} + #ifndef TEST_H_INCLUDED + #define TEST_H_INCLUDED + #else + #error header_was_already_included + #endif + const int dummy = 1; + """) + return header_file, "error: #error header_was_already_included" + +def __test_relative_header_create_source(dir, include1, include2, is_include1_sys=False, is_include2_sys=False, inv=False): + if inv: + return __test_relative_header_create_source(dir, include1=include2, include2=include1, is_include1_sys=is_include2_sys, is_include2_sys=is_include1_sys) + ## otherwise + + src_file = os.path.join(dir, 'test.c') + with open(src_file, 'wt') as f: + f.write(f""" + #undef TEST_H_INCLUDED + #include {format_include(include1, is_include1_sys)} + #include {format_include(include2, is_include2_sys)} + """) + return src_file + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +def test_relative_header_1(record_property, tmpdir, with_pragma_once, is_sys): + _, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + + test_file = __test_relative_header_create_source(tmpdir, "test.h", "test.h", is_include1_sys=is_sys, is_include2_sys=is_sys) + + args = ([format_include_path_arg(tmpdir)] if is_sys else []) + [test_file] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + if with_pragma_once: + assert stderr == '' + else: + assert double_include_error in stderr + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_2(record_property, tmpdir, with_pragma_once, inv, source_relative): + header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + + test_file = __test_relative_header_create_source(tmpdir, "test.h", header_file, inv=inv) + + args = ["test.c" if source_relative else test_file] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + if with_pragma_once: + assert stderr == '' + if inv or not source_relative: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + else: + assert '#line 8 "test.h"' in stdout + else: + assert double_include_error in stderr + +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_3(record_property, tmpdir, is_sys, inv, source_relative): + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + header_file, _ = __test_relative_header_create_header(test_subdir) + + test_file = __test_relative_header_create_source(tmpdir, "test_subdir/test.h", header_file, is_include1_sys=is_sys, inv=inv) + + args = ["test.c" if source_relative else test_file] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + if is_sys: + assert "missing header: Header not found" in stderr + else: + assert stderr == '' + if source_relative and not inv: + assert '#line 8 "test_subdir/test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout + +@pytest.mark.parametrize("use_short_path", (False, True)) +@pytest.mark.parametrize("relative_include_dir", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_4(record_property, tmpdir, use_short_path, relative_include_dir, is_sys, inv, source_relative): + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + header_file, _ = __test_relative_header_create_header(test_subdir) + if use_short_path: + header_file = "test_subdir/test.h" + + test_file = __test_relative_header_create_source(tmpdir, header_file, "test.h", is_include2_sys=is_sys, inv=inv) + + args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), "test.c" if source_relative else test_file] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert stderr == '' + if (source_relative and use_short_path and not inv) or (relative_include_dir and inv): + assert '#line 8 "test_subdir/test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(test_subdir).as_posix()}/test.h"' in stdout + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("relative_include_dir", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_5(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv, source_relative): # test relative paths with .. + ## in this test, the subdir role is the opposite then the previous - it contains the test.c file, while the parent tmpdir contains the header file + header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + if is_sys: + header_file_second_path = "test.h" + else: + header_file_second_path = "../test.h" + + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + test_file = __test_relative_header_create_source(test_subdir, header_file, header_file_second_path, is_include2_sys=is_sys, inv=inv) + + args = ([format_include_path_arg(".." if relative_include_dir else tmpdir)] if is_sys else []) + ["test.c" if source_relative else test_file] + + _, stdout, stderr = simplecpp(args, cwd=test_subdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + if with_pragma_once: + assert stderr == '' + if (relative_include_dir if is_sys else source_relative) and inv: + assert '#line 8 "../test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + else: + assert double_include_error in stderr + +@pytest.mark.parametrize("with_pragma_once", (False, True)) +@pytest.mark.parametrize("relative_include_dir", (False, True)) +@pytest.mark.parametrize("is_sys", (False, True)) +@pytest.mark.parametrize("inv", (False, True)) +@pytest.mark.parametrize("source_relative", (False, True)) +def test_relative_header_6(record_property, tmpdir, with_pragma_once, relative_include_dir, is_sys, inv, source_relative): # test relative paths with .. that is resolved only by an include dir + ## in this test, both the header and the source file are at the same dir, but there is a dummy inclusion dir as a subdir + header_file, double_include_error = __test_relative_header_create_header(tmpdir, with_pragma_once=with_pragma_once) + + test_subdir = os.path.join(tmpdir, "test_subdir") + os.mkdir(test_subdir) + test_file = __test_relative_header_create_source(tmpdir, header_file, "../test.h", is_include2_sys=is_sys, inv=inv) + + args = [format_include_path_arg("test_subdir" if relative_include_dir else test_subdir), "test.c" if source_relative else test_file] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + if with_pragma_once: + assert stderr == '' + if relative_include_dir and inv: + assert '#line 8 "test.h"' in stdout + else: + assert f'#line 8 "{pathlib.PurePath(tmpdir).as_posix()}/test.h"' in stdout + else: + assert double_include_error in stderr + +def test_same_name_header(record_property, tmpdir): + include_a = os.path.join(tmpdir, "include_a") + include_b = os.path.join(tmpdir, "include_b") + + test_file = os.path.join(tmpdir, "test.c") + header_a = os.path.join(include_a, "header_a.h") + header_b = os.path.join(include_b, "header_b.h") + same_name_a = os.path.join(include_a, "same_name.h") + same_name_b = os.path.join(include_b, "same_name.h") + + os.mkdir(include_a) + os.mkdir(include_b) + + with open(test_file, "wt") as f: + f.write(""" + #include + #include + TEST + """) + + with open(header_a, "wt") as f: + f.write(""" + #include "same_name.h" + """) + + with open(header_b, "wt") as f: + f.write(""" + #include "same_name.h" + """) + + with open(same_name_a, "wt") as f: + f.write(""" + #define TEST E + """) + + with open(same_name_b, "wt") as f: + f.write(""" + #define TEST OK + """) + + args = [ + format_include_path_arg(include_a), + format_include_path_arg(include_b), + test_file + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert "OK" in stdout + assert stderr == "" + +def test_pragma_once_matching(record_property, tmpdir): + test_dir = os.path.join(tmpdir, "test_dir") + test_subdir = os.path.join(test_dir, "test_subdir") + + test_file = os.path.join(test_dir, "test.c") + once_header = os.path.join(test_dir, "once.h") + + if platform.system() == "Windows": + names_to_test = [ + '"once.h"', + '"Once.h"', + '', + '', + '"../test_dir/once.h"', + '"../test_dir/Once.h"', + '"../Test_Dir/once.h"', + '"../Test_Dir/Once.h"', + '"test_subdir/../once.h"', + '"test_subdir/../Once.h"', + '"Test_Subdir/../once.h"', + '"Test_Subdir/../Once.h"', + f"\"{test_dir}/once.h\"", + f"\"{test_dir}/Once.h\"", + f"<{test_dir}/once.h>", + f"<{test_dir}/Once.h>", + ] + else: + names_to_test = [ + '"once.h"', + '', + '"../test_dir/once.h"', + '"test_subdir/../once.h"', + f"\"{test_dir}/once.h\"", + f"<{test_dir}/once.h>", + ] + + os.mkdir(test_dir) + os.mkdir(test_subdir) + + with open(test_file, "wt") as f: + for n in names_to_test: + f.write(f""" + #include {n} + """); + + with open(once_header, "wt") as f: + f.write(f""" + #pragma once + ONCE + """); + + args = [ + format_include_path_arg(test_dir), + test_file + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert stdout.count("ONCE") == 1 + assert stderr == "" + + +def test_input_multiple(record_property, tmpdir): + test_file = os.path.join(tmpdir, "test.c") + with open(test_file, 'w'): + pass + + test_file_1 = os.path.join(tmpdir, "test1.c") + with open(test_file_1, 'w'): + pass + + args = [ + 'test.c', + 'test1.c' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: multiple filenames specified\n" == stdout + + +def test_input_missing(record_property, tmpdir): + args = [ + 'missing.c' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: could not open file 'missing.c'\n" == stdout + + +def test_input_dir(record_property, tmpdir): + test_dir = os.path.join(tmpdir, "test") + os.mkdir(test_dir) + + args = [ + 'test' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: could not open file 'test'\n" == stdout + + +def test_incpath_missing(record_property, tmpdir): + test_file = os.path.join(tmpdir, "test.c") + with open(test_file, 'w'): + pass + + test_dir = os.path.join(tmpdir, "test") + os.mkdir(test_dir) + + args = [ + '-Itest', + '-Imissing', + 'test.c' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: could not find include path 'missing'\n" == stdout + + +def test_incpath_file(record_property, tmpdir): + test_file = os.path.join(tmpdir, "test.c") + with open(test_file, 'w'): + pass + + inc_dir = os.path.join(tmpdir, "inc") + os.mkdir(inc_dir) + + inc_file = os.path.join(tmpdir, "inc.h") + with open(test_file, 'w'): + pass + + args = [ + '-Iinc', + '-Iinc.h', + 'test.c' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: could not find include path 'inc.h'\n" == stdout + + +def test_incfile_missing(record_property, tmpdir): + test_file = os.path.join(tmpdir, "test.c") + with open(test_file, 'w'): + pass + + inc_file = os.path.join(tmpdir, "inc.h") + with open(inc_file, 'w'): + pass + + args = [ + '-include=inc.h', + '-include=missing.h', + 'test.c' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: could not open include 'missing.h'\n" == stdout + + +def test_incpath_dir(record_property, tmpdir): + test_file = os.path.join(tmpdir, "test.c") + with open(test_file, 'w'): + pass + + inc_file = os.path.join(tmpdir, "inc.h") + with open(inc_file, 'w'): + pass + + inc_dir = os.path.join(tmpdir, "inc") + os.mkdir(inc_dir) + + args = [ + '-include=inc.h', + '-include=inc', + 'test.c' + ] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert '' == stderr + assert "error: could not open include 'inc'\n" == stdout + + +def test_include_header_twice(tmpdir): + """ Issue #581 - Failure when header is included twice with different + macros defined""" + + header_file = tmpdir / 'test.h' + with open(header_file, 'wt') as f: + f.write(f""" + #if defined AAA + #elif defined BBB + # undef BBB + #endif + + #ifdef BBB + # error BBB is defined + #endif + """) + + test_file = os.path.join(tmpdir, 'test.c') + with open(test_file, 'wt') as f: + f.write(f""" + # define Y + # include "test.h" + + # define BBB + # include "test.h" + """) + + args = [test_file] + + _, stdout, stderr = simplecpp(args, cwd=tmpdir) + + assert stderr == '' + + +def test_define(record_property, tmpdir): # #589 + test_file = os.path.join(tmpdir, "test.cpp") + with open(test_file, 'w') as f: + f.write( +"""#define PREFIX_WITH_MACRO(test_name) Macro##test_name + +TEST_P(PREFIX_WITH_MACRO(NamingTest), n) {} +""") + + args = [ + '-DTEST_P(A,B)=void __ ## A ## _ ## B ( )', + 'test.cpp' + ] + + exitcode, stdout, stderr = simplecpp(args, cwd=tmpdir) + record_property("stdout", stdout) + record_property("stderr", stderr) + + assert exitcode == 0 + assert stderr == "test.cpp:1: syntax error: failed to expand 'TEST_P', Invalid ## usage when expanding 'TEST_P': Unexpected token ')'\n" + assert stdout == '\n' \ No newline at end of file diff --git a/main.cpp b/main.cpp index 823ed9f7..5c48f830 100644 --- a/main.cpp +++ b/main.cpp @@ -1,47 +1,153 @@ +/* + * simplecpp - A simple and high-fidelity C/C++ preprocessor library + * Copyright (C) 2016-2023 simplecpp team + */ +#define SIMPLECPP_TOKENLIST_ALLOW_PTR 0 #include "simplecpp.h" +#include +#include #include #include -#include +#include +#include +#include +#include + +static bool isDir(const std::string& path) +{ + struct stat file_stat; + if (stat(path.c_str(), &file_stat) == -1) + return false; + + return (file_stat.st_mode & S_IFMT) == S_IFDIR; +} int main(int argc, char **argv) { - const char *filename = NULL; + bool error = false; + const char *filename = nullptr; + bool use_istream = false; + bool fail_on_error = false; + bool linenrs = false; // Settings.. simplecpp::DUI dui; + dui.removeComments = true; + bool quiet = false; + bool error_only = false; for (int i = 1; i < argc; i++) { - const char *arg = argv[i]; + const char * const arg = argv[i]; if (*arg == '-') { - char c = arg[1]; - if (c != 'D' && c != 'U' && c != 'I' && c != 'i' && c != 's') - continue; // Ignored - const char *value = arg[2] ? (argv[i] + 2) : argv[++i]; + bool found = false; + const char c = arg[1]; switch (c) { - case 'D': // define symbol - dui.defines.push_back(value); + case 'D': { // define symbol + found = true; + const char * const value = arg[2] ? (argv[i] + 2) : argv[++i]; + if (!value) { + std::cout << "error: option -D with no value." << std::endl; + error = true; + break; + } + dui.defines.emplace_back(value); break; - case 'U': // undefine symbol + } + case 'U': { // undefine symbol + found = true; + const char * const value = arg[2] ? (argv[i] + 2) : argv[++i]; + if (!value) { + std::cout << "error: option -U with no value." << std::endl; + error = true; + break; + } dui.undefined.insert(value); break; - case 'I': // include path - dui.includePaths.push_back(value); + } + case 'I': { // include path + found = true; + const char * const value = arg[2] ? (arg + 2) : argv[++i]; + if (!value) { + std::cout << "error: option -I with no value." << std::endl; + error = true; + break; + } + dui.includePaths.emplace_back(value); break; + } case 'i': - if (std::strncmp(arg, "-include=",9)==0) - dui.includes.push_back(arg+9); + if (std::strncmp(arg, "-include=",9)==0) { + found = true; + std::string value = arg + 9; + if (value.empty()) { + std::cout << "error: option -include with no value." << std::endl; + error = true; + break; + } + dui.includes.emplace_back(std::move(value)); + } else if (std::strcmp(arg, "-is")==0) { + found = true; + use_istream = true; + } break; case 's': - if (std::strncmp(arg, "-std=",5)==0) - dui.std = arg + 5; + if (std::strncmp(arg, "-std=",5)==0) { + found = true; + std::string value = arg + 5; + if (value.empty()) { + std::cout << "error: option -std with no value." << std::endl; + error = true; + break; + } + dui.std = std::move(value); + } + break; + case 'q': + if (std::strcmp(arg, "-q")==0) { + found = true; + quiet = true; + } + break; + case 'e': + if (std::strcmp(arg, "-e")==0) { + found = true; + error_only = true; + } break; + case 'f': + if (std::strcmp(arg, "-f")==0) { + found = true; + fail_on_error = true; + } + break; + case 'l': + if (std::strcmp(arg, "-l")==0) { + linenrs = true; + found = true; + } + break; + } + if (!found) { + std::cout << "error: option '" << arg << "' is unknown." << std::endl; + error = true; } + } else if (filename) { + std::cout << "error: multiple filenames specified" << std::endl; + return 1; } else { filename = arg; } } + if (error) + return 1; + + if (quiet && error_only) { + std::cout << "error: -e cannot be used in conjunction with -q" << std::endl; + return 1; + } + if (!filename) { std::cout << "Syntax:" << std::endl; std::cout << "simplecpp [options] filename" << std::endl; @@ -49,56 +155,106 @@ int main(int argc, char **argv) std::cout << " -IPATH Include path." << std::endl; std::cout << " -include=FILE Include FILE." << std::endl; std::cout << " -UNAME Undefine NAME." << std::endl; - std::exit(0); + std::cout << " -std=STD Specify standard." << std::endl; + std::cout << " -q Quiet mode (no output)." << std::endl; + std::cout << " -is Use std::istream interface." << std::endl; + std::cout << " -e Output errors only." << std::endl; + std::cout << " -f Fail when errors were encountered (exitcode 1)." << std::endl; + std::cout << " -l Print lines numbers." << std::endl; + return 0; + } + + // TODO: move this logic into simplecpp + bool inp_missing = false; + + for (const std::string& inc : dui.includes) { + std::ifstream f(inc); + if (!f.is_open() || isDir(inc)) { + inp_missing = true; + std::cout << "error: could not open include '" << inc << "'" << std::endl; + } + } + + for (const std::string& inc : dui.includePaths) { + if (!isDir(inc)) { + inp_missing = true; + std::cout << "error: could not find include path '" << inc << "'" << std::endl; + } + } + + std::ifstream f(filename); + if (!f.is_open() || isDir(filename)) { + inp_missing = true; + std::cout << "error: could not open file '" << filename << "'" << std::endl; } + if (inp_missing) + return 1; + // Perform preprocessing simplecpp::OutputList outputList; std::vector files; - std::ifstream f(filename); - simplecpp::TokenList rawtokens(f,files,filename,&outputList); - rawtokens.removeComments(); - std::map included = simplecpp::load(rawtokens, files, dui, &outputList); - for (std::pair i : included) - i.second->removeComments(); simplecpp::TokenList outputTokens(files); - simplecpp::preprocess(outputTokens, rawtokens, files, included, dui, &outputList); + { + simplecpp::TokenList *rawtokens; + if (use_istream) { + rawtokens = new simplecpp::TokenList(f, files,filename,&outputList); + } else { + f.close(); + rawtokens = new simplecpp::TokenList(filename,files,&outputList); + } + rawtokens->removeComments(); + simplecpp::FileDataCache filedata; + simplecpp::preprocess(outputTokens, *rawtokens, files, filedata, dui, &outputList); + simplecpp::cleanup(filedata); + delete rawtokens; + } // Output - std::cout << outputTokens.stringify() << std::endl; - for (const simplecpp::Output &output : outputList) { - std::cerr << output.location.file() << ':' << output.location.line << ": "; - switch (output.type) { - case simplecpp::Output::ERROR: - std::cerr << "#error: "; - break; - case simplecpp::Output::WARNING: - std::cerr << "#warning: "; - break; - case simplecpp::Output::MISSING_HEADER: - std::cerr << "missing header: "; - break; - case simplecpp::Output::INCLUDE_NESTED_TOO_DEEPLY: - std::cerr << "include nested too deeply: "; - break; - case simplecpp::Output::SYNTAX_ERROR: - std::cerr << "syntax error: "; - break; - case simplecpp::Output::PORTABILITY_BACKSLASH: - std::cerr << "portability: "; - break; - case simplecpp::Output::UNHANDLED_CHAR_ERROR: - std::cerr << "unhandled char error: "; - break; - case simplecpp::Output::EXPLICIT_INCLUDE_NOT_FOUND: - std::cerr << "explicit include not found: "; - break; + if (!quiet) { + if (!error_only) + std::cout << outputTokens.stringify(linenrs) << std::endl; + + for (const simplecpp::Output &output : outputList) { + std::cerr << outputTokens.file(output.location) << ':' << output.location.line << ": "; + switch (output.type) { + case simplecpp::Output::ERROR: + std::cerr << "#error: "; + break; + case simplecpp::Output::WARNING: + std::cerr << "#warning: "; + break; + case simplecpp::Output::MISSING_HEADER: + std::cerr << "missing header: "; + break; + case simplecpp::Output::INCLUDE_NESTED_TOO_DEEPLY: + std::cerr << "include nested too deeply: "; + break; + case simplecpp::Output::SYNTAX_ERROR: + std::cerr << "syntax error: "; + break; + case simplecpp::Output::PORTABILITY_BACKSLASH: + std::cerr << "portability: "; + break; + case simplecpp::Output::UNHANDLED_CHAR_ERROR: + std::cerr << "unhandled char error: "; + break; + case simplecpp::Output::EXPLICIT_INCLUDE_NOT_FOUND: + std::cerr << "explicit include not found: "; + break; + case simplecpp::Output::FILE_NOT_FOUND: + std::cerr << "file not found: "; + break; + case simplecpp::Output::DUI_ERROR: + std::cerr << "dui error: "; + break; + } + std::cerr << output.msg << std::endl; } - std::cerr << output.msg << std::endl; } - // cleanup included tokenlists - simplecpp::cleanup(included); + if (fail_on_error && !outputList.empty()) + return 1; return 0; } diff --git a/run-tests.py b/run-tests.py index 0211ed04..20f59a49 100644 --- a/run-tests.py +++ b/run-tests.py @@ -1,21 +1,35 @@ - import glob import os +import shutil import subprocess +import sys + -def cleanup(out): - ret = '' - for s in out.decode('utf-8').split('\n'): - if len(s) > 1 and s[0] == '#': +def cleanup(out: str) -> str: + parts = [] + for line in out.splitlines(): + if len(line) > 1 and line[0] == '#': continue - s = "".join(s.split()) - ret = ret + s - return ret + parts.append("".join(line.split())) + return "".join(parts) + + +# Check for required compilers and exit if any are missing +CLANG_EXE = shutil.which('clang') +if not CLANG_EXE: + sys.exit('Failed to run tests: clang compiler not found') + +GCC_EXE = shutil.which('gcc') +if not GCC_EXE: + sys.exit('Failed to run tests: gcc compiler not found') + +SIMPLECPP_EXE = './simplecpp' + commands = [] for f in sorted(glob.glob(os.path.expanduser('testsuite/clang-preprocessor-tests/*.c*'))): - for line in open(f, 'rt'): + for line in open(f, 'rt', encoding='utf-8'): if line.startswith('// RUN: %clang_cc1 '): cmd = '' for arg in line[19:].split(): @@ -38,7 +52,7 @@ def cleanup(out): 'has_attribute.cpp', 'header_lookup1.c', # missing include 'line-directive-output.c', - 'macro_paste_hashhash.c', + # 'macro_paste_hashhash.c', 'microsoft-ext.c', 'normalize-3.c', # gcc has different output \uAC00 vs \U0000AC00 on cygwin/linux 'pr63831-1.c', # __has_attribute => works differently on cygwin/linux @@ -70,7 +84,6 @@ def cleanup(out): 'c99-6_10_3_4_p6.c', 'expr_usual_conversions.c', # condition is true: 4U - 30 >= 0 'stdint.c', - 'stringize_misc.c', # GCC.. 'diagnostic-pragma-1.c', @@ -78,8 +91,24 @@ def cleanup(out): 'pr57580.c', ] + +def run(compiler_executable: str, compiler_args: list[str]) -> tuple[int, str, str, str]: + """Execute a compiler command and capture its exit code, stdout, and stderr.""" + compiler_cmd = [compiler_executable] + compiler_cmd.extend(compiler_args) + + with subprocess.Popen(compiler_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding="utf-8") as process: + stdout, stderr = process.communicate() + exit_code = process.returncode + + output = cleanup(stdout) + error = (stderr or "").strip() + return (exit_code, output, stdout, error) + + numberOfSkipped = 0 numberOfFailed = 0 +numberOfFixed = 0 usedTodos = [] @@ -88,39 +117,47 @@ def cleanup(out): numberOfSkipped = numberOfSkipped + 1 continue - clang_cmd = ['clang'] - clang_cmd.extend(cmd.split(' ')) - p = subprocess.Popen(clang_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - comm = p.communicate() - clang_output = cleanup(comm[0]) - - gcc_cmd = ['gcc'] - gcc_cmd.extend(cmd.split(' ')) - p = subprocess.Popen(gcc_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - comm = p.communicate() - gcc_output = cleanup(comm[0]) - - simplecpp_cmd = ['./simplecpp'] - simplecpp_cmd.extend(cmd.split(' ')) - p = subprocess.Popen(simplecpp_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - comm = p.communicate() - simplecpp_output = cleanup(comm[0]) - - if simplecpp_output != clang_output and simplecpp_output != gcc_output: + _, clang_output_c, clang_output, _ = run(CLANG_EXE, cmd.split(' ')) + + _, gcc_output_c, gcc_output, _ = run(GCC_EXE, cmd.split(' ')) + + # -E is not supported and we bail out on unknown options + simplecpp_ec, simplecpp_output_c, simplecpp_output, simplecpp_err = run(SIMPLECPP_EXE, cmd.replace('-E ', '', 1).split(' ')) + + if simplecpp_output_c != clang_output_c and simplecpp_output_c != gcc_output_c: filename = cmd[cmd.rfind('/')+1:] if filename in todo: print('TODO ' + cmd) usedTodos.append(filename) else: print('FAILED ' + cmd) + print('---expected (clang):') + print(clang_output_c) + print('---expected (gcc):') + print(gcc_output_c) + print('---actual:') + print(simplecpp_output_c) + print('---output (clang):') + print(clang_output) + print('---output (gcc):') + print(gcc_output) + print('---output (simplecpp):') + print(simplecpp_output) + if simplecpp_ec: + print('simplecpp failed - ' + simplecpp_err) numberOfFailed = numberOfFailed + 1 for filename in todo: if not filename in usedTodos: print('FIXED ' + filename) + numberOfFixed = numberOfFixed + 1 print('Number of tests: ' + str(len(commands))) print('Number of skipped: ' + str(numberOfSkipped)) -print('Number of todos: ' + str(len(usedTodos))) +print('Number of todos (fixed): ' + str(len(usedTodos)) + ' (' + str(numberOfFixed) + ')') print('Number of failed: ' + str(numberOfFailed)) +if numberOfFailed or numberOfFixed: + sys.exit(1) + +sys.exit(0) diff --git a/runastyle b/runastyle deleted file mode 100755 index 64298273..00000000 --- a/runastyle +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# The version check in this script is used to avoid commit battles -# between different developers that use different astyle versions as -# different versions might have different output (this has happened in -# the past). - -# If project management wishes to take a newer astyle version into use -# just change this string to match the start of astyle version string. -ASTYLE_VERSION="Artistic Style Version 3.0.1" -ASTYLE="astyle" - -DETECTED_VERSION=`$ASTYLE --version 2>&1` -if [[ "$DETECTED_VERSION" != ${ASTYLE_VERSION}* ]]; then - echo "You should use: ${ASTYLE_VERSION}"; - echo "Detected: ${DETECTED_VERSION}" - exit 1; -fi - -style="--style=kr --indent=spaces=4 --indent-namespaces --lineend=linux --min-conditional-indent=0" -options="--options=none --pad-header --unpad-paren --suffix=none --convert-tabs --attach-inlines --attach-classes --attach-namespaces" - -$ASTYLE $style $options *.cpp -$ASTYLE $style $options *.h diff --git a/runastyle.bat b/runastyle.bat deleted file mode 100644 index b8f11561..00000000 --- a/runastyle.bat +++ /dev/null @@ -1,26 +0,0 @@ -@REM Script to run AStyle on the sources -@REM The version check in this script is used to avoid commit battles -@REM between different developers that use different astyle versions as -@REM different versions might have different output (this has happened in -@REM the past). - -@REM If project management wishes to take a newer astyle version into use -@REM just change this string to match the start of astyle version string. -@SET ASTYLE_VERSION="Artistic Style Version 3.0.1" -@SET ASTYLE="astyle" - -@SET DETECTED_VERSION="" -@FOR /F "tokens=*" %%i IN ('%ASTYLE% --version') DO SET DETECTED_VERSION=%%i -@ECHO %DETECTED_VERSION% | FINDSTR /B /C:%ASTYLE_VERSION% > nul && ( - ECHO "%DETECTED_VERSION%" matches %ASTYLE_VERSION% -) || ( - ECHO You should use: %ASTYLE_VERSION% - ECHO Detected: "%DETECTED_VERSION%" - GOTO EXIT_ERROR -) - -@SET STYLE=--style=kr --indent=spaces=4 --indent-namespaces --lineend=linux --min-conditional-indent=0 -@SET OPTIONS=--pad-header --unpad-paren --suffix=none --convert-tabs --attach-inlines --attach-classes --attach-namespaces - -%ASTYLE% %STYLE% %OPTIONS% *.cpp -%ASTYLE% %STYLE% %OPTIONS% *.h \ No newline at end of file diff --git a/runformat b/runformat new file mode 100755 index 00000000..0dc1ad52 --- /dev/null +++ b/runformat @@ -0,0 +1,176 @@ +#!/bin/bash +# +# runformat - format this project's C++ sources with Uncrustify. +# +# Usage: +# ./runformat # format using the configured Uncrustify +# ./runformat --install # download, build, and use Uncrustify locally +# ./runformat --install --install-dir /abs/path +# ./runformat --expected-uncrustify-version # print ONLY the expected Uncrustify version +# +# You may also set: +# UNCRUSTIFY=/abs/path/to/uncrustify # use a specific binary +# UNCRUSTIFY_INSTALL_DIR=/abs/path # where `--install` will install +# +# Requirements: +# - All developers must use the *exact* same Uncrustify version to avoid format churn. +# - Either: +# * Have `uncrustify` in PATH, or +# * Set env var UNCRUSTIFY=/absolute/path/to/uncrustify, or +# * Run `./runformat --install` to fetch & build the pinned version locally. +# +# Notes: +# - The local install lives under: ./.runformat-uncrustify/uncrustify--install +# - The config file is expected at: ./.uncrustify.cfg +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +UNCRUSTIFY_VERSION="0.80.1" +UNCRUSTIFY_HASH="6bf662e05c4140dd4df5e45d6690cad96b4ef23c293b85813f5c725bbf1894d0" + +UNCRUSTIFY_WORK_DIR="${SCRIPT_DIR}/.runformat-uncrustify" + +# Allow external install dir override (arg or env). If not set, default under work dir. +DEFAULT_INSTALL_DIR="${UNCRUSTIFY_WORK_DIR}/uncrustify-${UNCRUSTIFY_VERSION}-install" +UNCRUSTIFY_INSTALL_DIR="${UNCRUSTIFY_INSTALL_DIR:-$DEFAULT_INSTALL_DIR}" +UNCRUSTIFY_BIN="${UNCRUSTIFY_INSTALL_DIR}/bin/uncrustify" + +# Allow override via env; default to local pinned build path. +UNCRUSTIFY="${UNCRUSTIFY:-$UNCRUSTIFY_BIN}" +UNCRUSTIFY_CONFIG="${SCRIPT_DIR}/.uncrustify.cfg" + +err() { echo -e >&2 "ERROR: $@\n"; } +die() { err $@; exit 1; } + +install_uncrustify() { + local root="uncrustify-${UNCRUSTIFY_VERSION}" + local file="${root}.tar.gz" + local url="https://github.com/uncrustify/uncrustify/releases/download/${root}/${file}" + + mkdir -p "${UNCRUSTIFY_WORK_DIR}" + + echo "Downloading ${file}..." + curl -fsSL -o "${UNCRUSTIFY_WORK_DIR}/${file}" "${url}" + + ( + cd "${UNCRUSTIFY_WORK_DIR}" + + echo "${UNCRUSTIFY_HASH} ${file}" > "${file}.sha256" + sha256sum -c "${file}.sha256" + rm -f "${file}.sha256" + + command -v cmake >/dev/null 2>&1 || die "cmake executable not found." + + echo "Extracting archive..." + rm -rf "${root}" "${root}-build" + mkdir -p "${root}" + tar -xzf "${file}" --strip-components=1 -C "${root}" + + echo "Configuring (prefix: ${UNCRUSTIFY_INSTALL_DIR})..." + cmake \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DCMAKE_INSTALL_PREFIX:PATH="${UNCRUSTIFY_INSTALL_DIR}" \ + -S "${root}" -B "${root}-build" + + echo "Building & installing..." + cmake --build "${root}-build" --config Release --target install --parallel + ) + + echo "Installed Uncrustify to: ${UNCRUSTIFY_INSTALL_DIR}" +} + +print_usage_and_exit() { + sed -n '2,25p' "$0" | sed 's/^# \{0,1\}//' + exit 0 +} + +# Print ONLY expected Uncrustify version (no extra text). +print_expected_uncrustify_version_and_exit() { + printf '%s\n' "$UNCRUSTIFY_VERSION" + exit 0 +} + +# -------------------------- +# Argument parsing +# -------------------------- +DO_INSTALL=0 +PRINT_EXPECTED_UNCRUSTIFY_VERSION=0 +# Accept: --install, --install-dir , -h/--help +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + print_usage_and_exit + ;; + --install) + DO_INSTALL=1 + shift + ;; + --install-dir) + [[ $# -ge 2 ]] || die "$1 requires a path argument" + UNCRUSTIFY_INSTALL_DIR="$(readlink -m "$2" 2>/dev/null || realpath -m "$2")" + UNCRUSTIFY_BIN="${UNCRUSTIFY_INSTALL_DIR}/bin/uncrustify" + # Only update UNCRUSTIFY default if user hasn't explicitly set it + if [[ "${UNCRUSTIFY:-}" != "${UNCRUSTIFY_BIN}" ]]; then + UNCRUSTIFY="${UNCRUSTIFY_BIN}" + fi + shift 2 + ;; + --expected-uncrustify-version) + PRINT_EXPECTED_UNCRUSTIFY_VERSION=1 + shift + ;; + *) + # ignore unrecognized positional args for now + shift + ;; + esac +done + +if [[ "$DO_INSTALL" -eq 1 ]]; then + install_uncrustify + # Ensure we use the freshly installed binary for this run + UNCRUSTIFY="$UNCRUSTIFY_BIN" +fi + +# If requested, print ONLY the expected Uncrustify version and exit. +if [[ "$PRINT_EXPECTED_UNCRUSTIFY_VERSION" -eq 1 ]]; then + print_expected_uncrustify_version_and_exit +fi + +# -------------------------- +# Validate & run +# -------------------------- + +# Check Uncrustify availability +if ! command -v "$UNCRUSTIFY" >/dev/null 2>&1; then + err "Uncrustify executable not found: $UNCRUSTIFY" + die "Add it to PATH, set UNCRUSTIFY=/path/to/uncrustify, or run: $0 --install [--install-dir DIR]" +fi + +# Version check +DETECTED_VERSION="$("$UNCRUSTIFY" --version 2>&1 | grep -oE '[0-9]+(\.[0-9]+)*' | head -n1 || true)" +echo "Detected Uncrustify: ${DETECTED_VERSION:-unknown}" +if [[ "$DETECTED_VERSION" != "${UNCRUSTIFY_VERSION}" ]]; then + die "Expected Uncrustify ${UNCRUSTIFY_VERSION}. Re-run with --install (and optionally --install-dir) or set UNCRUSTIFY." +fi + +# Config check +[[ -f "$UNCRUSTIFY_CONFIG" ]] || die "Uncrustify config not found at: $UNCRUSTIFY_CONFIG" + +# Run formatter +echo "Running formatter..." +$UNCRUSTIFY -c "$UNCRUSTIFY_CONFIG" -l CPP --no-backup --replace *.cpp *.h + +# Show diff and fail if changes exist +echo "Checking for formatting changes..." +git diff --exit-code || { + echo + echo "Formatting changes were applied. Please review and commit." + exit 1 +} + +echo "Formatting is clean." diff --git a/selfcheck.sh b/selfcheck.sh new file mode 100755 index 00000000..b2129cc9 --- /dev/null +++ b/selfcheck.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +if [ -z "$SIMPLECPP_PATH" ]; then + SIMPLECPP_PATH=. +fi + +if [ -n "$VALGRIND_TOOL" ]; then + if [ "$VALGRIND_TOOL" = "memcheck" ]; then + VALGRIND_OPTS="--error-limit=yes --leak-check=full --num-callers=50 --show-reachable=yes --track-origins=yes --gen-suppressions=all --error-exitcode=42" + elif [ "$VALGRIND_TOOL" = "callgrind" ]; then + VALGRIND_OPTS="--tool=callgrind" + else + echo "unsupported valgrind tool '$VALGRIND_TOOL'" + exit 1 + fi + VALGRIND_CMD="valgrind --tool=$VALGRIND_TOOL --log-fd=9 $VALGRIND_OPTS" + VALGRIND_REDIRECT="valgrind_$VALGRIND_TOOL.log" +else + VALGRIND_CMD= + VALGRIND_REDIRECT="/dev/null" +fi + +output=$($VALGRIND_CMD ./simplecpp "$SIMPLECPP_PATH/simplecpp.cpp" -e -f 2>&1 9> "$VALGRIND_REDIRECT") +ec=$? +cat "$VALGRIND_REDIRECT" +errors=$(echo "$output" | grep -v 'Header not found: <') +if [ $ec -ne 0 ]; then + # only fail if we got errors which do not refer to missing system includes + if [ ! -z "$errors" ]; then + exit $ec + fi +fi + +if [ -z "$CXX" ]; then + exit 0 +fi + +cxx_type=$($CXX --version | head -1 | cut -d' ' -f1) +if [ "$cxx_type" = "Ubuntu" ] || [ "$cxx_type" = "Debian" ]; then + cxx_type=$($CXX --version | head -1 | cut -d' ' -f2) +fi + +# TODO: generate defines from compiler +if [ "$cxx_type" = "g++" ] || [ "$cxx_type" = "g++.exe" ]; then + defs= + defs="$defs -D__GNUC__" + defs="$defs -D__STDC__" + defs="$defs -D__x86_64__" + defs="$defs -D__STDC_HOSTED__" + defs="$defs -D__CHAR_BIT__=8" + if [ "${MSYSTEM}" = "MINGW32" ] || [ "${MSYSTEM}" = "MINGW64" ]; then + defs="$defs -D_WIN32" + fi + defs="$defs -D__has_builtin(x)=(1)" + defs="$defs -D__has_cpp_attribute(x)=(1)" + defs="$defs -D__has_attribute(x)=(1)" + defs="$defs -Ddefined(x)=(0)" + + inc= + while read line + do + inc="$inc -I$line" + done <<< "$($CXX -x c++ -v -c -S - 2>&1 < /dev/null | grep -e'^ [/A-Z]' | grep -v /cc1plus)" +elif [ "$cxx_type" = "clang" ]; then + # libstdc++ + defs= + defs="$defs -D__x86_64__" + defs="$defs -D__STDC_HOSTED__" + defs="$defs -D__CHAR_BIT__=8" + defs="$defs -D__BYTE_ORDER__=1234" + defs="$defs -D__SIZEOF_SIZE_T__=8" + if [ "${MSYSTEM}" = "MINGW32" ] || [ "${MSYSTEM}" = "MINGW64" ] || [ "${MSYSTEM}" = "CLANG64" ]; then + defs="$defs -D_WIN32" + fi + defs="$defs -D__has_builtin(x)=(1)" + defs="$defs -D__has_cpp_attribute(x)=(1)" + defs="$defs -D__has_feature(x)=(1)" + defs="$defs -D__has_include_next(x)=(1)" + defs="$defs -D__has_attribute(x)=(0)" + defs="$defs -D__building_module(x)=(0)" + defs="$defs -D__has_extension(x)=(1)" + defs="$defs -Ddefined(x)=(0)" + + inc= + while read line + do + inc="$inc -I$line" + done <<< "$($CXX -x c++ -v -c -S - 2>&1 < /dev/null | grep -e'^ [/A-Z]')" + + # TODO: enable + # libc++ + #defs= + #defs="$defs -D__x86_64__" + #defs="$defs -D__linux__" + #defs="$defs -D__SIZEOF_SIZE_T__=8" + #defs="$defs -D__has_include_next(x)=(0)" + #defs="$defs -D__has_builtin(x)=(1)" + #defs="$defs -D__has_feature(x)=(1)" + + #inc= + #while read line + #do + # inc="$inc -I$line" + #done <<< "$($CXX -x c++ -stdlib=libc++ -v -c -S - 2>&1 < /dev/null | grep -e'^ [/A-Z]')" +elif [ "$cxx_type" = "Apple" ]; then + defs= + defs="$defs -D__BYTE_ORDER__" + defs="$defs -D__APPLE__" + defs="$defs -D__GNUC__=15" + defs="$defs -D__x86_64__" + defs="$defs -D__SIZEOF_SIZE_T__=8" + defs="$defs -D__LITTLE_ENDIAN__" + defs="$defs -D__has_feature(x)=(0)" + defs="$defs -D__has_extension(x)=(1)" + defs="$defs -D__has_attribute(x)=(0)" + defs="$defs -D__has_cpp_attribute(x)=(0)" + defs="$defs -D__has_include_next(x)=(0)" + defs="$defs -D__has_builtin(x)=(1)" + defs="$defs -D__is_target_os(x)=(0)" + defs="$defs -D__is_target_arch(x)=(0)" + defs="$defs -D__is_target_vendor(x)=(0)" + defs="$defs -D__is_target_environment(x)=(0)" + defs="$defs -D__is_target_variant_os(x)=(0)" + defs="$defs -D__is_target_variant_environment(x)=(0)" + + inc= + while read line + do + inc="$inc -I$line" + # TODO: pass the framework path as such when possible + done <<< "$($CXX -x c++ -v -c -S - 2>&1 < /dev/null | grep -e'^ [/A-Z]' | sed 's/ (framework directory)//g')" + echo $inc +else + echo "unknown compiler '$cxx_type'" + exit 1 +fi + +# run with -std=gnuc++* so __has_include(...) is available +$VALGRIND_CMD ./simplecpp "$SIMPLECPP_PATH/simplecpp.cpp" -e -f -std=gnu++11 $defs $inc 9> "$VALGRIND_REDIRECT" +ec=$? +cat "$VALGRIND_REDIRECT" +if [ $ec -ne 0 ]; then + exit $ec +fi diff --git a/simplecpp.cpp b/simplecpp.cpp old mode 100755 new mode 100644 index 52a82f90..a7ced05a --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -1,44 +1,60 @@ /* * simplecpp - A simple and high-fidelity C/C++ preprocessor library - * Copyright (C) 2016 Daniel Marjamäki. - * - * This library is free software: you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation, either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . + * Copyright (C) 2016-2023 simplecpp team */ -#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) -#define SIMPLECPP_WINDOWS -#define NOMINMAX +#if defined(_WIN32) +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0602 +# endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +# undef ERROR #endif + #include "simplecpp.h" +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) +# define SIMPLECPP_WINDOWS +#endif + #include +#include +#include #include +#include // IWYU pragma: keep +#include +#include #include #include -#include +#include #include #include +#include #include +#include +#include +#include #include #include #include +#include +#ifdef SIMPLECPP_WINDOWS +# include +#endif +#include #include +#include -#ifdef SIMPLECPP_WINDOWS -#include -#undef ERROR -#undef TRUE +#ifdef _WIN32 +# include +#else +# include #endif static bool isHex(const std::string &s) @@ -51,6 +67,17 @@ static bool isOct(const std::string &s) return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8'); } +static bool isStringLiteral(const std::string &s) +{ + return s.size() > 1 && (s[0]=='\"') && (*s.rbegin()=='\"'); +} + +static bool isCharLiteral(const std::string &s) +{ + // char literal patterns can include 'a', '\t', '\000', '\xff', 'abcd', and maybe '' + // This only checks for the surrounding '' but doesn't parse the content. + return s.size() > 1 && (s[0]=='\'') && (*s.rbegin()=='\''); +} static const simplecpp::TokenString DEFINE("define"); static const simplecpp::TokenString UNDEF("undef"); @@ -75,11 +102,21 @@ static const simplecpp::TokenString HAS_INCLUDE("__has_include"); template static std::string toString(T t) { + // NOLINTNEXTLINE(misc-const-correctness) - false positive std::ostringstream ostr; ostr << t; return ostr.str(); } +#ifdef SIMPLECPP_DEBUG_MACRO_EXPANSION +static std::string locstring(const simplecpp::Location &loc) +{ + std::ostringstream ostr; + ostr << '[' << loc.file() << ':' << loc.line << ':' << loc.col << ']'; + return ostr.str(); +} +#endif + static long long stringToLL(const std::string &s) { long long ret; @@ -108,14 +145,9 @@ static unsigned long long stringToULL(const std::string &s) return ret; } -static bool startsWith(const std::string &str, const std::string &s) -{ - return (str.size() >= s.size() && str.compare(0, s.size(), s) == 0); -} - static bool endsWith(const std::string &s, const std::string &e) { - return (s.size() >= e.size() && s.compare(s.size() - e.size(), e.size(), e) == 0); + return (s.size() >= e.size()) && std::equal(e.rbegin(), e.rend(), s.rbegin()); } static bool sameline(const simplecpp::Token *tok1, const simplecpp::Token *tok2) @@ -147,11 +179,9 @@ static std::string replaceAll(std::string s, const std::string& from, const std: return s; } -const std::string simplecpp::Location::emptyFileName; - void simplecpp::Location::adjust(const std::string &str) { - if (str.find_first_of("\r\n") == std::string::npos) { + if (strpbrk(str.c_str(), "\r\n") == nullptr) { col += str.size(); return; } @@ -169,17 +199,17 @@ void simplecpp::Location::adjust(const std::string &str) bool simplecpp::Token::isOneOf(const char ops[]) const { - return (op != '\0') && (std::strchr(ops, op) != NULL); + return (op != '\0') && (std::strchr(ops, op) != nullptr); } bool simplecpp::Token::startsWithOneOf(const char c[]) const { - return std::strchr(c, string[0]) != NULL; + return std::strchr(c, string[0]) != nullptr; } bool simplecpp::Token::endsWithOneOf(const char c[]) const { - return std::strchr(c, string[string.size() - 1U]) != NULL; + return std::strchr(c, string[string.size() - 1U]) != nullptr; } void simplecpp::Token::printAll() const @@ -207,25 +237,273 @@ void simplecpp::Token::printOut() const std::cout << std::endl; } -simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(NULL), backToken(NULL), files(filenames) {} +// cppcheck-suppress noConstructor - we call init() in the inherited to initialize the private members +class simplecpp::TokenList::Stream { +public: + virtual ~Stream() = default; + + virtual int get() = 0; + virtual int peek() = 0; + virtual void unget() = 0; + virtual bool good() = 0; + + unsigned char readChar() { + auto ch = static_cast(get()); + + // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the + // character is non-ASCII character then replace it with 0xff + if (isUtf16) { + const auto ch2 = static_cast(get()); + const int ch16 = makeUtf16Char(ch, ch2); + ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); + } + + // Handling of newlines.. + if (ch == '\r') { + ch = '\n'; + + int ch2 = get(); + if (isUtf16) { + const int c2 = get(); + ch2 = makeUtf16Char(ch2, c2); + } + + if (ch2 != '\n') + ungetChar(); + } + + return ch; + } + + unsigned char peekChar() { + auto ch = static_cast(peek()); + + // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the + // character is non-ASCII character then replace it with 0xff + if (isUtf16) { + (void)get(); + const auto ch2 = static_cast(peek()); + unget(); + const int ch16 = makeUtf16Char(ch, ch2); + ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); + } + + // Handling of newlines.. + if (ch == '\r') + ch = '\n'; + + return ch; + } + + void ungetChar() { + unget(); + if (isUtf16) + unget(); + } + +protected: + void init() { + // initialize since we use peek() in getAndSkipBOM() + isUtf16 = false; + bom = getAndSkipBOM(); + isUtf16 = (bom == 0xfeff || bom == 0xfffe); + } + +private: + inline int makeUtf16Char(const unsigned char ch, const unsigned char ch2) const { + return (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); + } + + unsigned short getAndSkipBOM() { + const int ch1 = peek(); + + // The UTF-16 BOM is 0xfffe or 0xfeff. + if (ch1 >= 0xfe) { + (void)get(); + const unsigned short byte = (static_cast(ch1) << 8); + if (peek() >= 0xfe) + return byte | static_cast(get()); + unget(); + return 0; + } + + // Skip UTF-8 BOM 0xefbbbf + if (ch1 == 0xef) { + (void)get(); + if (peek() == 0xbb) { + (void)get(); + if (peek() == 0xbf) { + (void)get(); + return 0; + } + unget(); + } + unget(); + } + + return 0; + } + + unsigned short bom; +protected: + bool isUtf16; +}; + +namespace { + class StdIStream : public simplecpp::TokenList::Stream { + public: + // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members + explicit StdIStream(std::istream &istr) + : istr(istr) { + assert(istr.good()); + init(); + } + + int get() override { + return istr.get(); + } + int peek() override { + return istr.peek(); + } + void unget() override { + istr.unget(); + } + bool good() override { + return istr.good(); + } + + private: + std::istream &istr; + }; + + class StdCharBufStream : public simplecpp::TokenList::Stream { + public: + // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members + StdCharBufStream(const unsigned char* str, std::size_t size) + : str(str) + , size(size) + { + init(); + } + + int get() override { + if (pos >= size) + return lastStatus = EOF; + return str[pos++]; + } + int peek() override { + if (pos >= size) + return lastStatus = EOF; + return str[pos]; + } + void unget() override { + --pos; + } + bool good() override { + return lastStatus != EOF; + } + + private: + const unsigned char *str; + const std::size_t size; + std::size_t pos{}; + int lastStatus{}; + }; + + class FileStream : public simplecpp::TokenList::Stream { + public: + /** + * @throws simplecpp::Output thrown if file is not found + */ + // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members + explicit FileStream(const std::string &filename, std::vector &files) + : file(fopen(filename.c_str(), "rb")) + { + if (!file) { + files.emplace_back(filename); + throw simplecpp::Output(simplecpp::Output::FILE_NOT_FOUND, {}, "File is missing: " + filename); + } + init(); + } + + FileStream(const FileStream&) = delete; + FileStream &operator=(const FileStream&) = delete; + + ~FileStream() override { + fclose(file); + file = nullptr; + } + + int get() override { + lastStatus = lastCh = fgetc(file); + return lastCh; + } + int peek() override { + // keep lastCh intact + const int ch = fgetc(file); + unget_internal(ch); + return ch; + } + void unget() override { + unget_internal(lastCh); + } + bool good() override { + return lastStatus != EOF; + } + + private: + void unget_internal(int ch) { + if (isUtf16) { + // TODO: use ungetc() as well + // UTF-16 has subsequent unget() calls + fseek(file, -1, SEEK_CUR); + } else { + ungetc(ch, file); + } + } + + FILE *file; + int lastCh{}; + int lastStatus{}; + }; +} + +simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} simplecpp::TokenList::TokenList(std::istream &istr, std::vector &filenames, const std::string &filename, OutputList *outputList) - : frontToken(NULL), backToken(NULL), files(filenames) + : frontToken(nullptr), backToken(nullptr), files(filenames) +{ + StdIStream stream(istr); + readfile(stream,filename,outputList); +} + +simplecpp::TokenList::TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList, int /*unused*/) + : frontToken(nullptr), backToken(nullptr), files(filenames) +{ + StdCharBufStream stream(data, size); + readfile(stream,filename,outputList); +} + +simplecpp::TokenList::TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList) + : frontToken(nullptr), backToken(nullptr), files(filenames) { - readfile(istr,filename,outputList); + try { + FileStream stream(filename, filenames); + readfile(stream,filename,outputList); + } catch (const simplecpp::Output & e) { + outputList->emplace_back(e); + } } -simplecpp::TokenList::TokenList(const TokenList &other) : frontToken(NULL), backToken(NULL), files(other.files) +simplecpp::TokenList::TokenList(const TokenList &other) : frontToken(nullptr), backToken(nullptr), files(other.files) { *this = other; } -#if __cplusplus >= 201103L -simplecpp::TokenList::TokenList(TokenList &&other) : frontToken(NULL), backToken(NULL), files(other.files) +simplecpp::TokenList::TokenList(TokenList &&other) : frontToken(nullptr), backToken(nullptr), files(other.files) { *this = std::move(other); } -#endif simplecpp::TokenList::~TokenList() { @@ -236,6 +514,7 @@ simplecpp::TokenList &simplecpp::TokenList::operator=(const TokenList &other) { if (this != &other) { clear(); + files = other.files; for (const Token *tok = other.cfront(); tok; tok = tok->next) push_back(new Token(*tok)); sizeOfType = other.sizeOfType; @@ -243,26 +522,25 @@ simplecpp::TokenList &simplecpp::TokenList::operator=(const TokenList &other) return *this; } -#if __cplusplus >= 201103L simplecpp::TokenList &simplecpp::TokenList::operator=(TokenList &&other) { if (this != &other) { clear(); - backToken = other.backToken; - other.backToken = NULL; frontToken = other.frontToken; - other.frontToken = NULL; + other.frontToken = nullptr; + backToken = other.backToken; + other.backToken = nullptr; + files = other.files; sizeOfType = std::move(other.sizeOfType); } return *this; } -#endif void simplecpp::TokenList::clear() { - backToken = NULL; + backToken = nullptr; while (frontToken) { - Token *next = frontToken->next; + Token * const next = frontToken->next; delete frontToken; frontToken = next; } @@ -279,24 +557,34 @@ void simplecpp::TokenList::push_back(Token *tok) backToken = tok; } -void simplecpp::TokenList::dump() const +void simplecpp::TokenList::dump(bool linenrs) const { - std::cout << stringify() << std::endl; + std::cout << stringify(linenrs) << std::endl; } -std::string simplecpp::TokenList::stringify() const +std::string simplecpp::TokenList::stringify(bool linenrs) const { std::ostringstream ret; - Location loc(files); + Location loc; + loc.line = 1; + bool filechg = true; for (const Token *tok = cfront(); tok; tok = tok->next) { if (tok->location.line < loc.line || tok->location.fileIndex != loc.fileIndex) { - ret << "\n#line " << tok->location.line << " \"" << tok->location.file() << "\"\n"; + ret << "\n#line " << tok->location.line << " \"" << file(tok->location) << "\"\n"; loc = tok->location; + filechg = true; + } + + if (linenrs && filechg) { + ret << loc.line << ": "; + filechg = false; } while (tok->location.line > loc.line) { ret << '\n'; loc.line++; + if (linenrs) + ret << loc.line << ": "; } if (sameline(tok->previous, tok)) @@ -310,92 +598,6 @@ std::string simplecpp::TokenList::stringify() const return ret.str(); } -static unsigned char readChar(std::istream &istr, unsigned int bom) -{ - unsigned char ch = (unsigned char)istr.get(); - - // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the - // character is non-ASCII character then replace it with 0xff - if (bom == 0xfeff || bom == 0xfffe) { - const unsigned char ch2 = (unsigned char)istr.get(); - const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); - ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16); - } - - // Handling of newlines.. - if (ch == '\r') { - ch = '\n'; - if (bom == 0 && (char)istr.peek() == '\n') - (void)istr.get(); - else if (bom == 0xfeff || bom == 0xfffe) { - int c1 = istr.get(); - int c2 = istr.get(); - int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); - if (ch16 != '\n') { - istr.unget(); - istr.unget(); - } - } - } - - return ch; -} - -static unsigned char peekChar(std::istream &istr, unsigned int bom) -{ - unsigned char ch = (unsigned char)istr.peek(); - - // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the - // character is non-ASCII character then replace it with 0xff - if (bom == 0xfeff || bom == 0xfffe) { - (void)istr.get(); - const unsigned char ch2 = (unsigned char)istr.peek(); - istr.unget(); - const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); - ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16); - } - - // Handling of newlines.. - if (ch == '\r') - ch = '\n'; - - return ch; -} - -static void ungetChar(std::istream &istr, unsigned int bom) -{ - istr.unget(); - if (bom == 0xfeff || bom == 0xfffe) - istr.unget(); -} - -static unsigned short getAndSkipBOM(std::istream &istr) -{ - const int ch1 = istr.peek(); - - // The UTF-16 BOM is 0xfffe or 0xfeff. - if (ch1 >= 0xfe) { - unsigned short bom = ((unsigned char)istr.get() << 8); - if (istr.peek() >= 0xfe) - return bom | (unsigned char)istr.get(); - istr.unget(); - return 0; - } - - // Skip UTF-8 BOM 0xefbbbf - if (ch1 == 0xef) { - (void)istr.get(); - if (istr.get() == 0xbb && istr.peek() == 0xbf) { - (void)istr.get(); - } else { - istr.unget(); - istr.unget(); - } - } - - return 0; -} - static bool isNameChar(unsigned char ch) { return std::isalnum(ch) || ch == '_' || ch == '$'; @@ -406,7 +608,7 @@ static std::string escapeString(const std::string &str) std::ostringstream ostr; ostr << '\"'; for (std::size_t i = 1U; i < str.size() - 1; ++i) { - char c = str[i]; + const char c = str[i]; if (c == '\\' || c == '\"' || c == '\'') ostr << '\\'; ostr << c; @@ -415,15 +617,16 @@ static std::string escapeString(const std::string &str) return ostr.str(); } -static void portabilityBackslash(simplecpp::OutputList *outputList, const std::vector &files, const simplecpp::Location &location) +static void portabilityBackslash(simplecpp::OutputList *outputList, const simplecpp::Location &location) { if (!outputList) return; - simplecpp::Output err(files); - err.type = simplecpp::Output::PORTABILITY_BACKSLASH; - err.location = location; - err.msg = "Combination 'backslash space newline' is not portable."; - outputList->push_back(err); + simplecpp::Output err{ + simplecpp::Output::PORTABILITY_BACKSLASH, + location, + "Combination 'backslash space newline' is not portable." + }; + outputList->emplace_back(std::move(err)); } static bool isStringLiteralPrefix(const std::string &str) @@ -432,16 +635,16 @@ static bool isStringLiteralPrefix(const std::string &str) str == "R" || str == "uR" || str == "UR" || str == "LR" || str == "u8R"; } -void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int line, Location *location) +void simplecpp::TokenList::lineDirective(unsigned int fileIndex_, unsigned int line, Location &location) { - if (fileIndex != location->fileIndex || line >= location->line) { - location->fileIndex = fileIndex; - location->line = line; + if (fileIndex_ != location.fileIndex || line >= location.line) { + location.fileIndex = fileIndex_; + location.line = line; return; } - if (line + 2 >= location->line) { - location->line = line; + if (line + 2 >= location.line) { + location.line = line; while (cback()->op != '#') deleteToken(back()); deleteToken(back()); @@ -449,36 +652,30 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li } } -void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filename, OutputList *outputList) +static const std::string COMMENT_END("*/"); + +void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, OutputList *outputList) { std::stack loc; unsigned int multiline = 0U; - const Token *oldLastToken = NULL; + const Token *oldLastToken = nullptr; - const unsigned short bom = getAndSkipBOM(istr); - - Location location(files); - location.fileIndex = fileIndex(filename); - location.line = 1U; - location.col = 1U; - while (istr.good()) { - unsigned char ch = readChar(istr,bom); - if (!istr.good()) + Location location(fileIndex(filename), 1, 1); + while (stream.good()) { + unsigned char ch = stream.readChar(); + if (!stream.good()) break; - if (ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r') - ch = ' '; if (ch >= 0x80) { if (outputList) { - simplecpp::Output err(files); - err.type = simplecpp::Output::UNHANDLED_CHAR_ERROR; - err.location = location; - std::ostringstream s; - s << (int)ch; - err.msg = "The code contains unhandled character(s) (character code=" + s.str() + "). Neither unicode nor extended ascii is supported."; - outputList->push_back(err); + simplecpp::Output err{ + simplecpp::Output::UNHANDLED_CHAR_ERROR, + location, + "The code contains unhandled character(s) (character code=" + std::to_string(static_cast(ch)) + "). Neither unicode nor extended ascii is supported." + }; + outputList->emplace_back(std::move(err)); } clear(); return; @@ -487,7 +684,7 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen if (ch == '\n') { if (cback() && cback()->op == '\\') { if (location.col > cback()->location.col + 1U) - portabilityBackslash(outputList, files, cback()->location); + portabilityBackslash(outputList, cback()->location); ++multiline; deleteToken(back()); } else { @@ -499,19 +696,56 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen if (oldLastToken != cback()) { oldLastToken = cback(); - const std::string lastline(lastLine()); - if (lastline == "# file %str%") { - loc.push(location); - location.fileIndex = fileIndex(cback()->str().substr(1U, cback()->str().size() - 2U)); - location.line = 1U; - } else if (lastline == "# line %num%") { - lineDirective(location.fileIndex, std::atol(cback()->str().c_str()), &location); - } else if (lastline == "# %num% %str%" || lastline == "# line %num% %str%") { - lineDirective(fileIndex(replaceAll(cback()->str().substr(1U, cback()->str().size() - 2U),"\\\\","\\")), - std::atol(cback()->previous->str().c_str()), &location); + const Token * const llTok = isLastLinePreprocessor(); + if (!llTok) + continue; + const Token * const llNextToken = llTok->next; + if (!llTok->next) + continue; + if (llNextToken->next) { + // #file "file.c" + if (llNextToken->str() == "file" && + llNextToken->next->str()[0] == '\"') + { + const Token *strtok = cback(); + while (strtok->comment) + strtok = strtok->previous; + loc.push(location); + location.fileIndex = fileIndex(strtok->str().substr(1U, strtok->str().size() - 2U)); + location.line = 1U; + } + // TODO: add support for "# 3" + // #3 "file.c" + // #line 3 "file.c" + else if ((llNextToken->number && + llNextToken->next->str()[0] == '\"') || + (llNextToken->str() == "line" && + llNextToken->next->number && + llNextToken->next->next && + llNextToken->next->next->str()[0] == '\"')) + { + const Token *strtok = cback(); + while (strtok->comment) + strtok = strtok->previous; + const Token *numtok = strtok->previous; + while (numtok->comment) + numtok = numtok->previous; + lineDirective(fileIndex(replaceAll(strtok->str().substr(1U, strtok->str().size() - 2U),"\\\\","\\")), + std::atol(numtok->str().c_str()), location); + } + // #line 3 + else if (llNextToken->str() == "line" && + llNextToken->next->number) + { + const Token *numtok = cback(); + while (numtok->comment) + numtok = numtok->previous; + lineDirective(location.fileIndex, std::atol(numtok->str().c_str()), location); + } } // #endfile - else if (lastline == "# endfile" && !loc.empty()) { + else if (llNextToken->str() == "endfile" && !loc.empty()) + { location = loc.top(); loc.pop(); } @@ -520,66 +754,87 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen continue; } - if (std::isspace(ch)) { + if (ch <= ' ') { location.col++; continue; } TokenString currentToken; - if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#' && (lastLine() == "# error" || lastLine() == "# warning")) { - char prev = ' '; - while (istr.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) { - currentToken += ch; - prev = ch; - ch = readChar(istr, bom); + if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#') { + const Token* const ppTok = cback()->previous; + if (ppTok->next && (ppTok->next->str() == "error" || ppTok->next->str() == "warning")) { + char prev = ' '; + while (stream.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) { + currentToken += ch; + prev = ch; + ch = stream.readChar(); + } + stream.ungetChar(); + push_back(new Token(currentToken, location)); + location.adjust(currentToken); + continue; } - ungetChar(istr, bom); - push_back(new Token(currentToken, location)); - location.adjust(currentToken); - continue; } // number or name if (isNameChar(ch)) { - const bool num = std::isdigit(ch); - while (istr.good() && isNameChar(ch)) { + const bool num = !!std::isdigit(ch); + while (stream.good() && isNameChar(ch)) { currentToken += ch; - ch = readChar(istr,bom); - if (num && ch=='\'' && isNameChar(peekChar(istr,bom))) - ch = readChar(istr,bom); + ch = stream.readChar(); + if (num && ch=='\'' && isNameChar(stream.peekChar())) + ch = stream.readChar(); } - ungetChar(istr,bom); + stream.ungetChar(); } // comment - else if (ch == '/' && peekChar(istr,bom) == '/') { - while (istr.good() && ch != '\r' && ch != '\n') { + else if (ch == '/' && stream.peekChar() == '/') { + while (stream.good() && ch != '\n') { currentToken += ch; - ch = readChar(istr, bom); + ch = stream.readChar(); + if (ch == '\\') { + TokenString tmp; + char tmp_ch = ch; + while ((stream.good()) && (tmp_ch == '\\' || tmp_ch == ' ' || tmp_ch == '\t')) { + tmp += tmp_ch; + tmp_ch = stream.readChar(); + } + if (!stream.good()) { + break; + } + + if (tmp_ch != '\n') { + currentToken += tmp; + } else { + const TokenString check_portability = currentToken + tmp; + const std::string::size_type pos = check_portability.find_last_not_of(" \t"); + if (pos < check_portability.size() - 1U && check_portability[pos] == '\\') + portabilityBackslash(outputList, location); + ++multiline; + tmp_ch = stream.readChar(); + currentToken += '\n'; + } + ch = tmp_ch; + } } - const std::string::size_type pos = currentToken.find_last_not_of(" \t"); - if (pos < currentToken.size() - 1U && currentToken[pos] == '\\') - portabilityBackslash(outputList, files, location); - if (currentToken[currentToken.size() - 1U] == '\\') { - ++multiline; - currentToken.erase(currentToken.size() - 1U); - } else { - ungetChar(istr, bom); + if (ch == '\n') { + stream.ungetChar(); } } // comment - else if (ch == '/' && peekChar(istr,bom) == '*') { + else if (ch == '/' && stream.peekChar() == '*') { currentToken = "/*"; - (void)readChar(istr,bom); - ch = readChar(istr,bom); - while (istr.good()) { + (void)stream.readChar(); + ch = stream.readChar(); + while (stream.good()) { currentToken += ch; - if (currentToken.size() >= 4U && endsWith(currentToken, "*/")) + if (currentToken.size() >= 4U && endsWith(currentToken, COMMENT_END)) break; - ch = readChar(istr,bom); + ch = stream.readChar(); } // multiline.. @@ -588,7 +843,7 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen currentToken.erase(pos,2); ++multiline; } - if (multiline || startsWith(lastLine(10),"# ")) { + if (multiline || isLastLinePreprocessor()) { pos = 0; while ((pos = currentToken.find('\n',pos)) != std::string::npos) { currentToken.erase(pos,1); @@ -610,31 +865,33 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen std::string delim; currentToken = ch; prefix.resize(prefix.size() - 1); - ch = readChar(istr,bom); - while (istr.good() && ch != '(' && ch != '\n') { + ch = stream.readChar(); + while (stream.good() && ch != '(' && ch != '\n') { delim += ch; - ch = readChar(istr,bom); + ch = stream.readChar(); } - if (!istr.good() || ch == '\n') { + if (!stream.good() || ch == '\n') { if (outputList) { - Output err(files); - err.type = Output::SYNTAX_ERROR; - err.location = location; - err.msg = "Invalid newline in raw string delimiter."; - outputList->push_back(err); + Output err{ + Output::SYNTAX_ERROR, + location, + "Invalid newline in raw string delimiter." + }; + outputList->emplace_back(std::move(err)); } return; } const std::string endOfRawString(')' + delim + currentToken); - while (istr.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1)) - currentToken += readChar(istr,bom); + while (stream.good() && (!endsWith(currentToken, endOfRawString) || currentToken.size() <= 1)) + currentToken += stream.readChar(); if (!endsWith(currentToken, endOfRawString)) { if (outputList) { - Output err(files); - err.type = Output::SYNTAX_ERROR; - err.location = location; - err.msg = "Raw string missing terminating delimiter."; - outputList->push_back(err); + Output err{ + Output::SYNTAX_ERROR, + location, + "Raw string missing terminating delimiter." + }; + outputList->emplace_back(std::move(err)); } return; } @@ -644,14 +901,14 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen back()->setstr(currentToken); location.adjust(currentToken); if (currentToken.find_first_of("\r\n") == std::string::npos) - location.col += 2 + 2 * delim.size(); + location.col += 2 + (2 * delim.size()); else location.col += 1 + delim.size(); continue; } - currentToken = readUntil(istr,location,ch,ch,outputList,bom); + currentToken = readUntil(stream,location,ch,ch,outputList); if (currentToken.size() < 2U) // Error is reported by readUntil() return; @@ -665,16 +922,20 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen } if (prefix.empty()) - push_back(new Token(s, location)); // push string without newlines + push_back(new Token(s, location, !!std::isspace(stream.peekChar()))); // push string without newlines else back()->setstr(prefix + s); - if (newlines > 0 && lastLine().compare(0,9,"# define ") == 0) { - multiline += newlines; - location.adjust(s); - } else { - location.adjust(currentToken); + if (newlines > 0) { + const Token * const llTok = lastLineTok(); + if (llTok && llTok->op == '#' && llTok->next && (llTok->next->str() == "define" || llTok->next->str() == "pragma") && llTok->next->next) { + multiline += newlines; + location.adjust(s); + continue; + } } + + location.adjust(currentToken); continue; } @@ -682,13 +943,16 @@ void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filen currentToken += ch; } - if (currentToken == "<" && lastLine() == "# include") { - currentToken = readUntil(istr, location, '<', '>', outputList, bom); - if (currentToken.size() < 2U) - return; + if (*currentToken.begin() == '<') { + const Token * const llTok = lastLineTok(); + if (llTok && llTok->op == '#' && llTok->next && llTok->next->str() == "include") { + currentToken = readUntil(stream, location, '<', '>', outputList); + if (currentToken.size() < 2U) + return; + } } - push_back(new Token(currentToken, location)); + push_back(new Token(currentToken, location, !!std::isspace(stream.peekChar()))); if (multiline) location.col += currentToken.size(); @@ -719,7 +983,7 @@ void simplecpp::TokenList::constFold() constFoldComparison(tok); constFoldBitwise(tok); constFoldLogicalOp(tok); - constFoldQuestionOp(&tok); + constFoldQuestionOp(tok); // If there is no '(' we are done with the constant folding if (tok->op != '(') @@ -742,6 +1006,14 @@ static bool isFloatSuffix(const simplecpp::Token *tok) return c == 'f' || c == 'l'; } +static const std::string AND("and"); +static const std::string BITAND("bitand"); +static const std::string BITOR("bitor"); +static bool isAlternativeAndBitandBitor(const simplecpp::Token* tok) +{ + return isAlternativeBinaryOp(tok, AND) || isAlternativeBinaryOp(tok, BITAND) || isAlternativeBinaryOp(tok, BITOR); +} + void simplecpp::TokenList::combineOperators() { std::stack executableScope; @@ -774,10 +1046,10 @@ void simplecpp::TokenList::combineOperators() continue; } // float literals.. - if (tok->previous && tok->previous->number) { + if (tok->previous && tok->previous->number && sameline(tok->previous, tok) && tok->previous->str().find_first_of("._") == std::string::npos) { tok->setstr(tok->previous->str() + '.'); deleteToken(tok->previous); - if (isFloatSuffix(tok->next) || (tok->next && tok->next->startsWithOneOf("AaBbCcDdEeFfPp"))) { + if (sameline(tok, tok->next) && (isFloatSuffix(tok->next) || (tok->next && tok->next->startsWithOneOf("AaBbCcDdEeFfPp") && !isAlternativeAndBitandBitor(tok->next)))) { tok->setstr(tok->str() + tok->next->str()); deleteToken(tok->next); } @@ -821,7 +1093,7 @@ void simplecpp::TokenList::combineOperators() start = start->previous; } if (indentlevel == -1 && start) { - const Token *ftok = start; + const Token * const ftok = start; bool isFuncDecl = ftok->name; while (isFuncDecl) { if (!start->name && start->str() != "::" && start->op != '*' && start->op != '&') @@ -916,21 +1188,23 @@ void simplecpp::TokenList::constFoldMulDivRem(Token *tok) continue; long long result; - if (tok->op == '*') + if (tok->op == '*') { result = (stringToLL(tok->previous->str()) * stringToLL(tok->next->str())); + } else if (tok->op == '/' || tok->op == '%') { - long long rhs = stringToLL(tok->next->str()); + const long long rhs = stringToLL(tok->next->str()); if (rhs == 0) throw std::overflow_error("division/modulo by zero"); - long long lhs = stringToLL(tok->previous->str()); + const long long lhs = stringToLL(tok->previous->str()); if (rhs == -1 && lhs == std::numeric_limits::min()) throw std::overflow_error("division overflow"); if (tok->op == '/') result = (lhs / rhs); else result = (lhs % rhs); - } else + } else { continue; + } tok = tok->previous; tok->setstr(toString(result)); @@ -1022,8 +1296,6 @@ void simplecpp::TokenList::constFoldComparison(Token *tok) } } -static const std::string BITAND("bitand"); -static const std::string BITOR("bitor"); static const std::string XOR("xor"); void simplecpp::TokenList::constFoldBitwise(Token *tok) { @@ -1058,7 +1330,6 @@ void simplecpp::TokenList::constFoldBitwise(Token *tok) } } -static const std::string AND("and"); static const std::string OR("or"); void simplecpp::TokenList::constFoldLogicalOp(Token *tok) { @@ -1089,10 +1360,11 @@ void simplecpp::TokenList::constFoldLogicalOp(Token *tok) } } -void simplecpp::TokenList::constFoldQuestionOp(Token **tok1) +void simplecpp::TokenList::constFoldQuestionOp(Token *&tok1) { bool gotoTok1 = false; - for (Token *tok = *tok1; tok && tok->op != ')'; tok = gotoTok1 ? *tok1 : tok->next) { + // NOLINTNEXTLINE(misc-const-correctness) - technically correct but used to access non-const data + for (Token *tok = tok1; tok && tok->op != ')'; tok = gotoTok1 ? tok1 : tok->next) { gotoTok1 = false; if (tok->str() != "?") continue; @@ -1107,8 +1379,8 @@ void simplecpp::TokenList::constFoldQuestionOp(Token **tok1) Token * const falseTok = trueTok->next->next; if (!falseTok) throw std::runtime_error("invalid expression"); - if (condTok == *tok1) - *tok1 = (condTok->str() != "0" ? trueTok : falseTok); + if (condTok == tok1) + tok1 = (condTok->str() != "0" ? trueTok : falseTok); deleteToken(condTok->next); // ? deleteToken(trueTok->next); // : deleteToken(condTok->str() == "0" ? trueTok : falseTok); @@ -1121,22 +1393,22 @@ void simplecpp::TokenList::removeComments() { Token *tok = frontToken; while (tok) { - Token *tok1 = tok; + Token * const tok1 = tok; tok = tok->next; if (tok1->comment) deleteToken(tok1); } } -std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom) +std::string simplecpp::TokenList::readUntil(Stream &stream, const Location &location, const char start, const char end, OutputList *outputList) { std::string ret; ret += start; bool backslash = false; char ch = 0; - while (ch != end && ch != '\r' && ch != '\n' && istr.good()) { - ch = readChar(istr, bom); + while (ch != end && ch != '\r' && ch != '\n' && stream.good()) { + ch = stream.readChar(); if (backslash && ch == '\n') { ch = 0; backslash = false; @@ -1148,13 +1420,14 @@ std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location & bool update_ch = false; char next = 0; do { - next = readChar(istr, bom); + next = stream.readChar(); if (next == '\r' || next == '\n') { ret.erase(ret.size()-1U); backslash = (next == '\r'); update_ch = false; - } else if (next == '\\') + } else if (next == '\\') { update_ch = !update_ch; + } ret += next; } while (next == '\\'); if (update_ch) @@ -1162,14 +1435,15 @@ std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location & } } - if (!istr.good() || ch != end) { + if (!stream.good() || ch != end) { clear(); if (outputList) { - Output err(files); - err.type = Output::SYNTAX_ERROR; - err.location = location; - err.msg = std::string("No pair for character (") + start + "). Can't process file. File is either invalid or unicode, which is currently not supported."; - outputList->push_back(err); + Output err{ + Output::SYNTAX_ERROR, + location, + std::string("No pair for character (") + start + "). Can't process file. File is either invalid or unicode, which is currently not supported." + }; + outputList->emplace_back(std::move(err)); } return ""; } @@ -1177,21 +1451,28 @@ std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location & return ret; } -std::string simplecpp::TokenList::lastLine(int maxsize) const +const simplecpp::Token* simplecpp::TokenList::lastLineTok(int maxsize) const { - std::string ret; + const Token* prevTok = nullptr; int count = 0; - for (const Token *tok = cback(); sameline(tok,cback()); tok = tok->previous) { + for (const Token *tok = cback(); ; tok = tok->previous) { + if (!sameline(tok, cback())) + break; if (tok->comment) continue; - if (!ret.empty()) - ret.insert(0, 1, ' '); - ret.insert(0, tok->str()[0] == '\"' ? std::string("%str%") - : tok->number ? std::string("%num%") : tok->str()); if (++count > maxsize) - return ""; + return nullptr; + prevTok = tok; } - return ret; + return prevTok; +} + +const simplecpp::Token* simplecpp::TokenList::isLastLinePreprocessor(int maxsize) const +{ + const Token * const prevTok = lastLineTok(maxsize); + if (prevTok && prevTok->op == '#') + return prevTok; + return nullptr; } unsigned int simplecpp::TokenList::fileIndex(const std::string &filename) @@ -1200,18 +1481,30 @@ unsigned int simplecpp::TokenList::fileIndex(const std::string &filename) if (files[i] == filename) return i; } - files.push_back(filename); + files.emplace_back(filename); return files.size() - 1U; } +const std::string& simplecpp::TokenList::file(const Location& loc) const +{ + static const std::string s_emptyFileName; + return loc.fileIndex < files.size() ? files[loc.fileIndex] : s_emptyFileName; +} + namespace simplecpp { + class Macro; + using MacroMap = std::unordered_map; + class Macro { public: - explicit Macro(std::vector &f) : nameTokDef(NULL), variadic(false), valueToken(NULL), endToken(NULL), files(f), tokenListDefine(f), valueDefinedInCode_(false) {} + explicit Macro(std::vector &f) : nameTokDef(nullptr), valueToken(nullptr), endToken(nullptr), files(f), tokenListDefine(f), variadic(false), variadicOpt(false), valueDefinedInCode_(false) {} - Macro(const Token *tok, std::vector &f) : nameTokDef(NULL), files(f), tokenListDefine(f), valueDefinedInCode_(true) { - if (sameline(tok->previous, tok)) + /** + * @throws std::runtime_error thrown on bad macro syntax + */ + Macro(const Token *tok, std::vector &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(true) { + if (sameline(tok->previousSkipComments(), tok)) throw std::runtime_error("bad macro syntax"); if (tok->op != '#') throw std::runtime_error("bad macro syntax"); @@ -1226,28 +1519,45 @@ namespace simplecpp { throw std::runtime_error("bad macro syntax"); } - Macro(const std::string &name, const std::string &value, std::vector &f) : nameTokDef(NULL), files(f), tokenListDefine(f), valueDefinedInCode_(false) { + /** + * @throws std::runtime_error thrown on bad macro syntax + */ + Macro(const std::string &name, const std::string &value, std::vector &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(false) { const std::string def(name + ' ' + value); - std::istringstream istr(def); - tokenListDefine.readfile(istr); + StdCharBufStream stream(reinterpret_cast(def.data()), def.size()); + tokenListDefine.readfile(stream); if (!parseDefine(tokenListDefine.cfront())) throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value); } - Macro(const Macro ¯o) : nameTokDef(NULL), files(macro.files), tokenListDefine(macro.files), valueDefinedInCode_(macro.valueDefinedInCode_) { - *this = macro; + Macro(const Macro &other) : nameTokDef(nullptr), files(other.files), tokenListDefine(other.files), valueDefinedInCode_(other.valueDefinedInCode_) { + // TODO: remove the try-catch - see #537 + // avoid bugprone-exception-escape clang-tidy warning + try { + *this = other; + } + catch (const Error&) {} // NOLINT(bugprone-empty-catch) + } + + ~Macro() { + delete optExpandValue; + delete optNoExpandValue; } - void operator=(const Macro ¯o) { - if (this != ¯o) { - valueDefinedInCode_ = macro.valueDefinedInCode_; - if (macro.tokenListDefine.empty()) - parseDefine(macro.nameTokDef); + Macro &operator=(const Macro &other) { + if (this != &other) { + files = other.files; + valueDefinedInCode_ = other.valueDefinedInCode_; + if (other.tokenListDefine.empty()) { + parseDefine(other.nameTokDef); + } else { - tokenListDefine = macro.tokenListDefine; + tokenListDefine = other.tokenListDefine; parseDefine(tokenListDefine.cfront()); } + usageList = other.usageList; } + return *this; } bool valueDefinedInCode() const { @@ -1261,23 +1571,29 @@ namespace simplecpp { * @param macros list of macros * @param inputFiles the input files * @return token after macro - * @throw Can throw wrongNumberOfParameters or invalidHashHash + * @throws Error thrown on missing or invalid preprocessor directives + * @throws wrongNumberOfParameters thrown on invalid number of parameters + * @throws invalidHashHash thrown on invalid ## usage */ - const Token * expand(TokenList * const output, + const Token * expand(TokenList & output, const Token * rawtok, - const std::map ¯os, + const MacroMap ¯os, std::vector &inputFiles) const { std::set expandedmacros; +#ifdef SIMPLECPP_DEBUG_MACRO_EXPANSION + std::cout << "expand " << name() << " " << locstring(rawtok->location) << std::endl; +#endif + TokenList output2(inputFiles); if (functionLike() && rawtok->next && rawtok->next->op == '(') { // Copy macro call to a new tokenlist with no linebreaks const Token * const rawtok1 = rawtok; TokenList rawtokens2(inputFiles); - rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location)); + rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location, rawtok->whitespaceahead)); rawtok = rawtok->next; - rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location)); + rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location, rawtok->whitespaceahead)); rawtok = rawtok->next; int par = 1; while (rawtok && par > 0) { @@ -1287,16 +1603,13 @@ namespace simplecpp { --par; else if (rawtok->op == '#' && !sameline(rawtok->previous, rawtok)) throw Error(rawtok->location, "it is invalid to use a preprocessor directive as macro parameter"); - rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location)); + rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location, rawtok->whitespaceahead)); rawtok = rawtok->next; } - bool first = true; - if (valueToken && valueToken->str() == rawtok1->str()) - first = false; - if (expand(&output2, rawtok1->location, rawtokens2.cfront(), macros, expandedmacros, first)) + if (expand(output2, rawtok1->location, rawtokens2.cfront(), macros, expandedmacros)) rawtok = rawtok1->next; } else { - rawtok = expand(&output2, rawtok->location, rawtok, macros, expandedmacros); + rawtok = expand(output2, rawtok->location, rawtok, macros, expandedmacros); } while (output2.cback() && rawtok) { unsigned int par = 0; @@ -1306,26 +1619,28 @@ namespace simplecpp { if (par==0) break; --par; - } else if (macro2tok->op == ')') + } else if (macro2tok->op == ')') { ++par; + } macro2tok = macro2tok->previous; } if (macro2tok) { // macro2tok->op == '(' macro2tok = macro2tok->previous; expandedmacros.insert(name()); - } else if (rawtok->op == '(') + } else if (rawtok->op == '(') { macro2tok = output2.back(); + } if (!macro2tok || !macro2tok->name) break; if (output2.cfront() != output2.cback() && macro2tok->str() == this->name()) break; - const std::map::const_iterator macro = macros.find(macro2tok->str()); + const MacroMap::const_iterator macro = macros.find(macro2tok->str()); if (macro == macros.end() || !macro->second.functionLike()) break; TokenList rawtokens2(inputFiles); const Location loc(macro2tok->location); while (macro2tok) { - Token *next = macro2tok->next; + Token * const next = macro2tok->next; rawtokens2.push_back(new Token(macro2tok->str(), loc)); output2.deleteToken(macro2tok); macro2tok = next; @@ -1334,8 +1649,9 @@ namespace simplecpp { const Token *rawtok2 = rawtok; for (; rawtok2; rawtok2 = rawtok2->next) { rawtokens2.push_back(new Token(rawtok2->str(), loc)); - if (rawtok2->op == '(') + if (rawtok2->op == '(') { ++par; + } else if (rawtok2->op == ')') { if (par <= 1U) break; @@ -1344,11 +1660,11 @@ namespace simplecpp { } if (!rawtok2 || par != 1U) break; - if (macro->second.expand(&output2, rawtok->location, rawtokens2.cfront(), macros, expandedmacros) != NULL) + if (macro->second.expand(output2, rawtok->location, rawtokens2.cfront(), macros, expandedmacros) != nullptr) break; rawtok = rawtok2->next; } - output->takeTokens(output2); + output.takeTokens(output2); return rawtok; } @@ -1378,8 +1694,8 @@ namespace simplecpp { /** base class for errors */ struct Error { Error(const Location &loc, const std::string &s) : location(loc), what(s) {} - Location location; - std::string what; + const Location location; + const std::string what; }; /** Struct that is thrown when macro is expanded with wrong number of parameters */ @@ -1389,22 +1705,53 @@ namespace simplecpp { /** Struct that is thrown when there is invalid ## usage */ struct invalidHashHash : public Error { - invalidHashHash(const Location &loc, const std::string ¯oName) : Error(loc, "Invalid ## usage when expanding \'" + macroName + "\'.") {} + static inline std::string format(const std::string ¯oName, const std::string &message) { + return "Invalid ## usage when expanding \'" + macroName + "\': " + message; + } + + invalidHashHash(const Location &loc, const std::string ¯oName, const std::string &message) + : Error(loc, format(macroName, message)) {} + + static inline invalidHashHash unexpectedToken(const Location &loc, const std::string ¯oName, const Token *tokenA) { + return {loc, macroName, "Unexpected token '"+ tokenA->str()+"'"}; + } + + static inline invalidHashHash cannotCombine(const Location &loc, const std::string ¯oName, const Token *tokenA, const Token *tokenB) { + return {loc, macroName, "Combining '"+ tokenA->str()+ "' and '"+ tokenB->str() + "' yields an invalid token."}; + } + + static inline invalidHashHash unexpectedNewline(const Location &loc, const std::string ¯oName) { + return {loc, macroName, "Unexpected newline"}; + } + + static inline invalidHashHash universalCharacterUB(const Location &loc, const std::string ¯oName, const Token* tokenA, const std::string& strAB) { + return {loc, macroName, "Combining '\\"+ tokenA->str()+ "' and '"+ strAB.substr(tokenA->str().size()) + "' yields universal character '\\" + strAB + "'. This is undefined behavior according to C standard chapter 5.1.1.2, paragraph 4."}; + } }; private: /** Create new token where Token::macro is set for replaced tokens */ - Token *newMacroToken(const TokenString &str, const Location &loc, bool replaced) const { - Token *tok = new Token(str,loc); + Token *newMacroToken(const TokenString &str, const Location &loc, bool replaced, const Token *expandedFromToken=nullptr) const { + auto *tok = new Token(str,loc); if (replaced) tok->macro = nameTokDef->str(); + if (expandedFromToken) + tok->setExpandedFrom(expandedFromToken, this); return tok; } + /** + * @throws Error thrown in case of __VA_OPT__ issues + */ bool parseDefine(const Token *nametoken) { nameTokDef = nametoken; variadic = false; + variadicOpt = false; + delete optExpandValue; + optExpandValue = nullptr; + delete optNoExpandValue; + optNoExpandValue = nullptr; if (!nameTokDef) { - valueToken = endToken = NULL; + valueToken = endToken = nullptr; args.clear(); return false; } @@ -1418,30 +1765,71 @@ namespace simplecpp { argtok->next && argtok->next->op == ')') { variadic = true; if (!argtok->previous->name) - args.push_back("__VA_ARGS__"); + args.emplace_back("__VA_ARGS__"); argtok = argtok->next; // goto ')' break; } if (argtok->op != ',') - args.push_back(argtok->str()); + args.emplace_back(argtok->str()); argtok = argtok->next; } if (!sameline(nametoken, argtok)) { endToken = argtok ? argtok->previous : argtok; - valueToken = NULL; + valueToken = nullptr; return false; } - valueToken = argtok ? argtok->next : NULL; + valueToken = argtok ? argtok->next : nullptr; } else { args.clear(); valueToken = nameTokDef->next; } if (!sameline(valueToken, nameTokDef)) - valueToken = NULL; + valueToken = nullptr; endToken = valueToken; - while (sameline(endToken, nameTokDef)) + while (sameline(endToken, nameTokDef)) { + if (variadic && endToken->str() == "__VA_OPT__") + variadicOpt = true; endToken = endToken->next; + } + + if (variadicOpt) { + TokenList expandValue(files); + TokenList noExpandValue(files); + for (const Token *tok = valueToken; tok && tok != endToken;) { + if (tok->str() == "__VA_OPT__") { + if (!sameline(tok, tok->next) || tok->next->op != '(') + throw Error(tok->location, "In definition of '" + nameTokDef->str() + "': Missing opening parenthesis for __VA_OPT__"); + tok = tok->next->next; + int par = 1; + while (tok && tok != endToken) { + if (tok->op == '(') + par++; + else if (tok->op == ')') + par--; + else if (tok->str() == "__VA_OPT__") + throw Error(tok->location, "In definition of '" + nameTokDef->str() + "': __VA_OPT__ cannot be nested"); + if (par == 0) { + tok = tok->next; + break; + } + expandValue.push_back(new Token(*tok)); + tok = tok->next; + } + if (par != 0) { + const Token *const lastTok = expandValue.back() ? expandValue.back() : valueToken->next; + throw Error(lastTok->location, "In definition of '" + nameTokDef->str() + "': Missing closing parenthesis for __VA_OPT__"); + } + } else { + expandValue.push_back(new Token(*tok)); + noExpandValue.push_back(new Token(*tok)); + tok = tok->next; + } + } + optExpandValue = new TokenList(std::move(expandValue)); + optNoExpandValue = new TokenList(std::move(noExpandValue)); + } + return true; } @@ -1457,62 +1845,54 @@ namespace simplecpp { std::vector getMacroParameters(const Token *nameTokInst, bool calledInDefine) const { if (!nameTokInst->next || nameTokInst->next->op != '(' || !functionLike()) - return std::vector(); + return {}; std::vector parametertokens; - parametertokens.push_back(nameTokInst->next); + parametertokens.emplace_back(nameTokInst->next); unsigned int par = 0U; - for (const Token *tok = nameTokInst->next->next; calledInDefine ? sameline(tok, nameTokInst) : (tok != NULL); tok = tok->next) { - if (tok->op == '(') + for (const Token *tok = nameTokInst->next->next; calledInDefine ? sameline(tok, nameTokInst) : (tok != nullptr); tok = tok->next) { + if (tok->op == '(') { ++par; + } else if (tok->op == ')') { if (par == 0U) { - parametertokens.push_back(tok); + parametertokens.emplace_back(tok); break; } --par; - } else if (par == 0U && tok->op == ',' && (!variadic || parametertokens.size() < args.size())) - parametertokens.push_back(tok); + } else if (par == 0U && tok->op == ',' && (!variadic || parametertokens.size() < args.size())) { + parametertokens.emplace_back(tok); + } } return parametertokens; } - const Token *appendTokens(TokenList *tokens, + const Token *appendTokens(TokenList &tokens, const Location &rawloc, const Token * const lpar, - const std::map ¯os, + const MacroMap ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { if (!lpar || lpar->op != '(') - return NULL; + return nullptr; unsigned int par = 0; const Token *tok = lpar; while (sameline(lpar, tok)) { if (tok->op == '#' && sameline(tok,tok->next) && tok->next->op == '#' && sameline(tok,tok->next->next)) { // A##B => AB - tok = expandHashHash(tokens, rawloc, tok, macros, expandedmacros, parametertokens); + tok = expandHashHash(tokens, rawloc, tok, macros, expandedmacros, parametertokens, false); } else if (tok->op == '#' && sameline(tok, tok->next) && tok->next->op != '#') { - tok = expandHash(tokens, rawloc, tok, macros, expandedmacros, parametertokens); + tok = expandHash(tokens, rawloc, tok, expandedmacros, parametertokens); } else { if (!expandArg(tokens, tok, rawloc, macros, expandedmacros, parametertokens)) { - bool expanded = false; - const std::map::const_iterator it = macros.find(tok->str()); - if (it != macros.end() && expandedmacros.find(tok->str()) == expandedmacros.end()) { - const Macro &m = it->second; - if (!m.functionLike()) { - m.expand(tokens, rawloc, tok, macros, expandedmacros); - expanded = true; - } - } - if (!expanded) { - tokens->push_back(new Token(*tok)); - if (tok->macro.empty() && (par > 0 || tok->str() != "(")) - tokens->back()->macro = name(); - } + tokens.push_back(new Token(*tok)); + if (tok->macro.empty() && (par > 0 || tok->str() != "(")) + tokens.back()->macro = name(); } - if (tok->op == '(') + if (tok->op == '(') { ++par; + } else if (tok->op == ')') { --par; if (par == 0U) @@ -1521,28 +1901,30 @@ namespace simplecpp { tok = tok->next; } } - for (Token *tok2 = tokens->front(); tok2; tok2 = tok2->next) + for (Token *tok2 = tokens.front(); tok2; tok2 = tok2->next) tok2->location = lpar->location; - return sameline(lpar,tok) ? tok : NULL; + return sameline(lpar,tok) ? tok : nullptr; } - const Token * expand(TokenList * const output, const Location &loc, const Token * const nameTokInst, const std::map ¯os, std::set expandedmacros, bool first=false) const { + const Token * expand(TokenList & output, const Location &loc, const Token * const nameTokInst, const MacroMap ¯os, std::set expandedmacros) const { + expandedmacros.insert(nameTokInst->str()); - if (!first) - expandedmacros.insert(nameTokInst->str()); +#ifdef SIMPLECPP_DEBUG_MACRO_EXPANSION + std::cout << " expand " << name() << " " << locstring(defineLocation()) << std::endl; +#endif - usageList.push_back(loc); + usageList.emplace_back(loc); if (nameTokInst->str() == "__FILE__") { - output->push_back(new Token('\"'+loc.file()+'\"', loc)); + output.push_back(new Token('\"'+output.file(loc)+'\"', loc)); return nameTokInst->next; } if (nameTokInst->str() == "__LINE__") { - output->push_back(new Token(toString(loc.line), loc)); + output.push_back(new Token(toString(loc.line), loc)); return nameTokInst->next; } if (nameTokInst->str() == "__COUNTER__") { - output->push_back(new Token(toString(usageList.size()-1U), loc)); + output.push_back(new Token(toString(usageList.size()-1U), loc)); return nameTokInst->next; } @@ -1554,7 +1936,7 @@ namespace simplecpp { if (functionLike()) { // No arguments => not macro expansion if (nameTokInst->next && nameTokInst->next->op != '(') { - output->push_back(new Token(nameTokInst->str(), loc)); + output.push_back(new Token(nameTokInst->str(), loc)); return nameTokInst->next; } @@ -1581,21 +1963,22 @@ namespace simplecpp { } } - const std::map::const_iterator m = macros.find("__COUNTER__"); + const MacroMap::const_iterator m = macros.find("__COUNTER__"); - if (!counter || m == macros.end()) + if (!counter || m == macros.end()) { parametertokens2.swap(parametertokens1); + } else { const Macro &counterMacro = m->second; unsigned int par = 0; for (const Token *tok = parametertokens1[0]; tok && par < parametertokens1.size(); tok = tok->next) { if (tok->str() == "__COUNTER__") { tokensparams.push_back(new Token(toString(counterMacro.usageList.size()), tok->location)); - counterMacro.usageList.push_back(tok->location); + counterMacro.usageList.emplace_back(tok->location); } else { tokensparams.push_back(new Token(*tok)); if (tok == parametertokens1[par]) { - parametertokens2.push_back(tokensparams.cback()); + parametertokens2.emplace_back(tokensparams.cback()); par++; } } @@ -1603,23 +1986,46 @@ namespace simplecpp { } } - Token * const output_end_1 = output->back(); + // NOLINTNEXTLINE(misc-const-correctness) - technically correct but used to access non-const data + Token * const output_end_1 = output.back(); + + const Token *valueToken2; + const Token *endToken2; + + if (variadicOpt) { + if (parametertokens2.size() > args.size() && parametertokens2[args.size() - 1]->next->op != ')') + valueToken2 = optExpandValue->cfront(); + else + valueToken2 = optNoExpandValue->cfront(); + endToken2 = nullptr; + } else { + valueToken2 = valueToken; + endToken2 = endToken; + } // expand - for (const Token *tok = valueToken; tok != endToken;) { + for (const Token *tok = valueToken2; tok != endToken2;) { if (tok->op != '#') { // A##B => AB if (sameline(tok, tok->next) && tok->next && tok->next->op == '#' && tok->next->next && tok->next->next->op == '#') { if (!sameline(tok, tok->next->next->next)) - throw invalidHashHash(tok->location, name()); + throw invalidHashHash::unexpectedNewline(tok->location, name()); + if (variadic && tok->op == ',' && tok->next->next->next->str() == args.back()) { + Token *const comma = newMacroToken(tok->str(), loc, isReplaced(expandedmacros), tok); + output.push_back(comma); + tok = expandToken(output, loc, tok->next->next->next, macros, expandedmacros, parametertokens2); + if (output.back() == comma) + output.deleteToken(comma); + continue; + } TokenList new_output(files); - if (!expandArg(&new_output, tok, parametertokens2)) - output->push_back(newMacroToken(tok->str(), loc, isReplaced(expandedmacros))); + if (!expandArg(new_output, tok, parametertokens2)) + output.push_back(newMacroToken(tok->str(), loc, isReplaced(expandedmacros), tok)); else if (new_output.empty()) // placemarker token - output->push_back(newMacroToken("", loc, isReplaced(expandedmacros))); + output.push_back(newMacroToken("", loc, isReplaced(expandedmacros))); else for (const Token *tok2 = new_output.cfront(); tok2; tok2 = tok2->next) - output->push_back(newMacroToken(tok2->str(), loc, isReplaced(expandedmacros))); + output.push_back(newMacroToken(tok2->str(), loc, isReplaced(expandedmacros), tok2)); tok = tok->next; } else { tok = expandToken(output, loc, tok, macros, expandedmacros, parametertokens2); @@ -1635,20 +2041,26 @@ namespace simplecpp { } if (numberOfHash == 4 && tok->next->location.col + 1 == tok->next->next->location.col) { // # ## # => ## - output->push_back(newMacroToken("##", loc, isReplaced(expandedmacros))); + output.push_back(newMacroToken("##", loc, isReplaced(expandedmacros))); tok = hashToken; continue; } if (numberOfHash >= 2 && tok->location.col + 1 < tok->next->location.col) { - output->push_back(new Token(*tok)); + output.push_back(new Token(*tok)); tok = tok->next; continue; } tok = tok->next; - if (tok == endToken) { - output->push_back(new Token(*tok->previous)); + if (tok == endToken2) { + if (tok) { + output.push_back(new Token(*tok->previous)); + } + else { + output.push_back(new Token(*nameTokInst)); + output.back()->setstr("\"\""); + } break; } if (tok->op == '#') { @@ -1656,12 +2068,12 @@ namespace simplecpp { tok = expandHashHash(output, loc, tok->previous, macros, expandedmacros, parametertokens2); } else { // #123 => "123" - tok = expandHash(output, loc, tok->previous, macros, expandedmacros, parametertokens2); + tok = expandHash(output, loc, tok->previous, expandedmacros, parametertokens2); } } if (!functionLike()) { - for (Token *tok = output_end_1 ? output_end_1->next : output->front(); tok; tok = tok->next) { + for (Token *tok = output_end_1 ? output_end_1->next : output.front(); tok; tok = tok->next) { tok->macro = nameTokInst->str(); } } @@ -1672,57 +2084,61 @@ namespace simplecpp { return functionLike() ? parametertokens2.back()->next : nameTokInst->next; } - const Token *recursiveExpandToken(TokenList *output, TokenList &temp, const Location &loc, const Token *tok, const std::map ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { - if (!(temp.cback() && temp.cback()->name && tok->next && tok->next->op == '(')) { - output->takeTokens(temp); + const Token *recursiveExpandToken(TokenList &output, TokenList &temp, const Location &loc, const Token *tok, const MacroMap ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { + if (!temp.cback() || !temp.cback()->name || !tok->next || tok->next->op != '(') { + output.takeTokens(temp); return tok->next; } if (!sameline(tok, tok->next)) { - output->takeTokens(temp); + output.takeTokens(temp); return tok->next; } - const std::map::const_iterator it = macros.find(temp.cback()->str()); + const MacroMap::const_iterator it = macros.find(temp.cback()->str()); if (it == macros.end() || expandedmacros.find(temp.cback()->str()) != expandedmacros.end()) { - output->takeTokens(temp); + output.takeTokens(temp); return tok->next; } const Macro &calledMacro = it->second; if (!calledMacro.functionLike()) { - output->takeTokens(temp); + output.takeTokens(temp); return tok->next; } TokenList temp2(files); temp2.push_back(new Token(temp.cback()->str(), tok->location)); - const Token *tok2 = appendTokens(&temp2, loc, tok->next, macros, expandedmacros, parametertokens); + const Token * const tok2 = appendTokens(temp2, loc, tok->next, macros, expandedmacros, parametertokens); if (!tok2) return tok->next; - output->takeTokens(temp); - output->deleteToken(output->back()); + output.takeTokens(temp); + output.deleteToken(output.back()); calledMacro.expand(output, loc, temp2.cfront(), macros, expandedmacros); return tok2->next; } - const Token *expandToken(TokenList *output, const Location &loc, const Token *tok, const std::map ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { + const Token *expandToken(TokenList &output, const Location &loc, const Token *tok, const MacroMap ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { // Not name.. if (!tok->name) { - output->push_back(newMacroToken(tok->str(), loc, true)); + output.push_back(newMacroToken(tok->str(), loc, true, tok)); return tok->next; } // Macro parameter.. { TokenList temp(files); - if (expandArg(&temp, tok, loc, macros, expandedmacros, parametertokens)) + if (expandArg(temp, tok, loc, macros, expandedmacros, parametertokens)) { + if (tok->str() == "__VA_ARGS__" && temp.empty() && output.cback() && output.cback()->str() == "," && + tok->nextSkipComments() && tok->nextSkipComments()->str() == ")") + output.deleteToken(output.back()); return recursiveExpandToken(output, temp, loc, tok, macros, expandedmacros, parametertokens); + } } // Macro.. - const std::map::const_iterator it = macros.find(tok->str()); + const MacroMap::const_iterator it = macros.find(tok->str()); if (it != macros.end() && expandedmacros.find(tok->str()) == expandedmacros.end()) { std::set expandedmacros2(expandedmacros); expandedmacros2.insert(tok->str()); @@ -1730,31 +2146,39 @@ namespace simplecpp { const Macro &calledMacro = it->second; if (!calledMacro.functionLike()) { TokenList temp(files); - calledMacro.expand(&temp, loc, tok, macros, expandedmacros); + calledMacro.expand(temp, loc, tok, macros, expandedmacros); return recursiveExpandToken(output, temp, loc, tok, macros, expandedmacros2, parametertokens); } - if (!sameline(tok, tok->next) || tok->next->op != '(') { - output->push_back(newMacroToken(tok->str(), loc, true)); + if (!sameline(tok, tok->next)) { + output.push_back(newMacroToken(tok->str(), loc, true, tok)); return tok->next; } TokenList tokens(files); tokens.push_back(new Token(*tok)); - const Token *tok2 = appendTokens(&tokens, loc, tok->next, macros, expandedmacros, parametertokens); + const Token * tok2 = nullptr; + if (tok->next->op == '(') { + tok2 = appendTokens(tokens, loc, tok->next, macros, expandedmacros, parametertokens); + } + else if (expandArg(tokens, tok->next, loc, macros, expandedmacros, parametertokens)) { + tokens.front()->location = loc; + if (tokens.cfront()->next && tokens.cfront()->next->op == '(') + tok2 = tok->next; + } if (!tok2) { - output->push_back(newMacroToken(tok->str(), loc, true)); + output.push_back(newMacroToken(tok->str(), loc, true, tok)); return tok->next; } TokenList temp(files); - calledMacro.expand(&temp, loc, tokens.cfront(), macros, expandedmacros); - return recursiveExpandToken(output, temp, loc, tok2, macros, expandedmacros2, parametertokens); + calledMacro.expand(temp, loc, tokens.cfront(), macros, expandedmacros); + return recursiveExpandToken(output, temp, loc, tok2, macros, expandedmacros, parametertokens); } - else if (tok->str() == DEFINED) { - const Token *tok2 = tok->next; - const Token *tok3 = tok2 ? tok2->next : NULL; - const Token *tok4 = tok3 ? tok3->next : NULL; - const Token *defToken = NULL; - const Token *lastToken = NULL; + if (tok->str() == DEFINED) { + const Token * const tok2 = tok->next; + const Token * const tok3 = tok2 ? tok2->next : nullptr; + const Token * const tok4 = tok3 ? tok3->next : nullptr; + const Token *defToken = nullptr; + const Token *lastToken = nullptr; if (sameline(tok, tok4) && tok2->op == '(' && tok3->name && tok4->op == ')') { defToken = tok3; lastToken = tok4; @@ -1765,25 +2189,27 @@ namespace simplecpp { std::string macroName = defToken->str(); if (defToken->next && defToken->next->op == '#' && defToken->next->next && defToken->next->next->op == '#' && defToken->next->next->next && defToken->next->next->next->name && sameline(defToken,defToken->next->next->next)) { TokenList temp(files); - if (expandArg(&temp, defToken, parametertokens)) + if (expandArg(temp, defToken, parametertokens)) macroName = temp.cback()->str(); - if (expandArg(&temp, defToken->next->next->next, parametertokens)) - macroName += temp.cback()->str(); + if (expandArg(temp, defToken->next->next->next, parametertokens)) + macroName += temp.cback() ? temp.cback()->str() : ""; else macroName += defToken->next->next->next->str(); lastToken = defToken->next->next->next; } const bool def = (macros.find(macroName) != macros.end()); - output->push_back(newMacroToken(def ? "1" : "0", loc, true)); + output.push_back(newMacroToken(def ? "1" : "0", loc, true)); return lastToken->next; } } - output->push_back(newMacroToken(tok->str(), loc, true)); + output.push_back(newMacroToken(tok->str(), loc, true, tok)); + if (it != macros.end()) + output.back()->markExpandedFrom(&it->second); return tok->next; } - bool expandArg(TokenList *output, const Token *tok, const std::vector ¶metertokens) const { + bool expandArg(TokenList &output, const Token *tok, const std::vector ¶metertokens) const { if (!tok->name) return false; @@ -1796,12 +2222,12 @@ namespace simplecpp { return true; for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U]; partok = partok->next) - output->push_back(new Token(*partok)); + output.push_back(new Token(*partok)); return true; } - bool expandArg(TokenList *output, const Token *tok, const Location &loc, const std::map ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { + bool expandArg(TokenList &output, const Token *tok, const Location &loc, const MacroMap ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { if (!tok->name) return false; const unsigned int argnr = getArgNum(tok->str()); @@ -1810,15 +2236,19 @@ namespace simplecpp { if (variadic && argnr + 1U >= parametertokens.size()) // empty variadic parameter return true; for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U];) { - const std::map::const_iterator it = macros.find(partok->str()); - if (it != macros.end() && (partok->str() == name() || expandedmacros.find(partok->str()) == expandedmacros.end())) - partok = it->second.expand(output, loc, partok, macros, expandedmacros); - else { - output->push_back(newMacroToken(partok->str(), loc, isReplaced(expandedmacros))); - output->back()->macro = partok->macro; + const MacroMap::const_iterator it = macros.find(partok->str()); + if (it != macros.end() && !partok->isExpandedFrom(&it->second) && (partok->str() == name() || expandedmacros.find(partok->str()) == expandedmacros.end())) { + std::set expandedmacros2(expandedmacros); // temporary amnesia to allow reexpansion of currently expanding macros during argument evaluation + expandedmacros2.erase(name()); + partok = it->second.expand(output, loc, partok, macros, std::move(expandedmacros2)); + } else { + output.push_back(newMacroToken(partok->str(), loc, isReplaced(expandedmacros), partok)); + output.back()->macro = partok->macro; partok = partok->next; } } + if (tok->whitespaceahead && output.back()) + output.back()->whitespaceahead = true; return true; } @@ -1827,20 +2257,24 @@ namespace simplecpp { * @param output destination tokenlist * @param loc location for expanded token * @param tok The # token - * @param macros all macros * @param expandedmacros set with expanded macros, with this macro * @param parametertokens parameters given when expanding this macro * @return token after the X */ - const Token *expandHash(TokenList *output, const Location &loc, const Token *tok, const std::map ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { + const Token *expandHash(TokenList &output, const Location &loc, const Token *tok, const std::set &expandedmacros, const std::vector ¶metertokens) const { TokenList tokenListHash(files); - tok = expandToken(&tokenListHash, loc, tok->next, macros, expandedmacros, parametertokens); + const MacroMap macros2; // temporarily bypass macro expansion + tok = expandToken(tokenListHash, loc, tok->next, macros2, expandedmacros, parametertokens); std::ostringstream ostr; ostr << '\"'; - for (const Token *hashtok = tokenListHash.cfront(); hashtok; hashtok = hashtok->next) + for (const Token *hashtok = tokenListHash.cfront(), *next; hashtok; hashtok = next) { + next = hashtok->next; ostr << hashtok->str(); + if (next && hashtok->whitespaceahead) + ostr << ' '; + } ostr << '\"'; - output->push_back(newMacroToken(escapeString(ostr.str()), loc, isReplaced(expandedmacros))); + output.push_back(newMacroToken(escapeString(ostr.str()), loc, isReplaced(expandedmacros))); return tok; } @@ -1853,76 +2287,117 @@ namespace simplecpp { * @param macros all macros * @param expandedmacros set with expanded macros, with this macro * @param parametertokens parameters given when expanding this macro + * @param expandResult expand ## result i.e. "AB"? * @return token after B */ - const Token *expandHashHash(TokenList *output, const Location &loc, const Token *tok, const std::map ¯os, const std::set &expandedmacros, const std::vector ¶metertokens) const { - Token *A = output->back(); + const Token *expandHashHash(TokenList &output, const Location &loc, const Token *tok, const MacroMap ¯os, const std::set &expandedmacros, const std::vector ¶metertokens, bool expandResult=true) const { + Token *A = output.back(); if (!A) - throw invalidHashHash(tok->location, name()); + throw invalidHashHash(tok->location, name(), "Missing first argument"); if (!sameline(tok, tok->next) || !sameline(tok, tok->next->next)) - throw invalidHashHash(tok->location, name()); + throw invalidHashHash::unexpectedNewline(tok->location, name()); - bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>"; - if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual) - throw invalidHashHash(tok->location, name()); + const bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>"; + const bool canBeConcatenatedStringOrChar = isStringLiteral(A->str()) || isCharLiteral(A->str()); + const bool unexpectedA = (!A->name && !A->number && !A->str().empty() && !canBeConcatenatedWithEqual && !canBeConcatenatedStringOrChar); - Token *B = tok->next->next; + const Token * const B = tok->next->next; if (!B->name && !B->number && B->op && !B->isOneOf("#=")) - throw invalidHashHash(tok->location, name()); + throw invalidHashHash::unexpectedToken(tok->location, name(), B); if ((canBeConcatenatedWithEqual && B->op != '=') || (!canBeConcatenatedWithEqual && B->op == '=')) - throw invalidHashHash(tok->location, name()); + throw invalidHashHash::cannotCombine(tok->location, name(), A, B); - std::string strAB; - - const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U]; + // Superficial check; more in-depth would in theory be possible _after_ expandArg + if (canBeConcatenatedStringOrChar && (B->number || !B->name)) + throw invalidHashHash::cannotCombine(tok->location, name(), A, B); TokenList tokensB(files); - if (expandArg(&tokensB, B, parametertokens)) { - if (tokensB.empty()) - strAB = A->str(); - else if (varargs && A->op == ',') { - strAB = ","; + const Token *nextTok = B->next; + + if (canBeConcatenatedStringOrChar) { + if (unexpectedA) + throw invalidHashHash::unexpectedToken(tok->location, name(), A); + + // It seems clearer to handle this case separately even though the code is similar-ish, but we don't want to merge here. + // TODO The question is whether the ## or varargs may still apply, and how to provoke? + if (expandArg(tokensB, B, parametertokens)) { + for (Token *b = tokensB.front(); b; b = b->next) + b->location = loc; } else { - strAB = A->str() + tokensB.cfront()->str(); - tokensB.deleteToken(tokensB.front()); + tokensB.push_back(new Token(*B)); + tokensB.back()->location = loc; } + output.takeTokens(tokensB); } else { - strAB = A->str() + B->str(); - } + std::string strAB; - const Token *nextTok = B->next; - if (varargs && tokensB.empty() && tok->previous->str() == ",") - output->deleteToken(A); - else if (strAB != "," && macros.find(strAB) == macros.end()) { - A->setstr(strAB); - for (Token *b = tokensB.front(); b; b = b->next) - b->location = loc; - output->takeTokens(tokensB); - } else if (nextTok->op == '#' && nextTok->next->op == '#') { - TokenList output2(files); - output2.push_back(new Token(strAB, tok->location)); - nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens); - output->deleteToken(A); - output->takeTokens(output2); - } else { - output->deleteToken(A); - TokenList tokens(files); - tokens.push_back(new Token(strAB, tok->location)); - // for function like macros, push the (...) - if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') { - const std::map::const_iterator it = macros.find(strAB); - if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) { - const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens); - if (tok2) - nextTok = tok2->next; + const bool varargs = variadic && !args.empty() && B->str() == args[args.size()-1U]; + + if (expandArg(tokensB, B, parametertokens)) { + if (tokensB.empty()) { + strAB = A->str(); + } + else if (varargs && A->op == ',') { + strAB = ","; } + else if (varargs && unexpectedA) { + throw invalidHashHash::unexpectedToken(tok->location, name(), A); + } + else { + strAB = A->str() + tokensB.cfront()->str(); + tokensB.deleteToken(tokensB.front()); + } + } else { + if (unexpectedA) + throw invalidHashHash::unexpectedToken(tok->location, name(), A); + strAB = A->str() + B->str(); + } + + // producing universal character is undefined behavior + if (A->previous && A->previous->str() == "\\") { + if (strAB[0] == 'u' && strAB.size() == 5) + throw invalidHashHash::universalCharacterUB(tok->location, name(), A, strAB); + if (strAB[0] == 'U' && strAB.size() == 9) + throw invalidHashHash::universalCharacterUB(tok->location, name(), A, strAB); + } + + if (varargs && tokensB.empty() && tok->previous->str() == ",") { + output.deleteToken(A); + } + else if (strAB != "," && macros.find(strAB) == macros.end()) { + A->setstr(strAB); + for (Token *b = tokensB.front(); b; b = b->next) + b->location = loc; + output.takeTokens(tokensB); + } else if (sameline(B, nextTok) && sameline(B, nextTok->next) && nextTok->op == '#' && nextTok->next->op == '#') { + TokenList output2(files); + output2.push_back(new Token(strAB, tok->location)); + nextTok = expandHashHash(output2, loc, nextTok, macros, expandedmacros, parametertokens); + output.deleteToken(A); + output.takeTokens(output2); + } else { + output.deleteToken(A); + TokenList tokens(files); + tokens.push_back(new Token(strAB, tok->location)); + // for function like macros, push the (...) + if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') { + const MacroMap::const_iterator it = macros.find(strAB); + if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) { + const Token * const tok2 = appendTokens(tokens, loc, B->next, macros, expandedmacros, parametertokens); + if (tok2) + nextTok = tok2->next; + } + } + if (expandResult) + expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens); + else + output.takeTokens(tokens); + for (Token *b = tokensB.front(); b; b = b->next) + b->location = loc; + output.takeTokens(tokensB); } - expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens); - for (Token *b = tokensB.front(); b; b = b->next) - b->location = loc; - output->takeTokens(tokensB); } return nextTok; @@ -1930,11 +2405,11 @@ namespace simplecpp { static bool isReplaced(const std::set &expandedmacros) { // return true if size > 1 - std::set::const_iterator it = expandedmacros.begin(); - if (it == expandedmacros.end()) + auto it = expandedmacros.cbegin(); + if (it == expandedmacros.cend()) return false; ++it; - return (it != expandedmacros.end()); + return (it != expandedmacros.cend()); } /** name token in definition */ @@ -1943,9 +2418,6 @@ namespace simplecpp { /** arguments for macro */ std::vector args; - /** is macro variadic? */ - bool variadic; - /** first token in replacement string */ const Token *valueToken; @@ -1961,6 +2433,16 @@ namespace simplecpp { /** usage of this macro */ mutable std::list usageList; + /** is macro variadic? */ + bool variadic; + + /** does the macro expansion have __VA_OPT__? */ + bool variadicOpt; + + /** Expansion value for varadic macros with __VA_OPT__ expanded and discarded respectively */ + const TokenList *optExpandValue{}; + const TokenList *optNoExpandValue{}; + /** was the value of this macro actually defined in the code? */ bool valueDefinedInCode_; }; @@ -1968,13 +2450,19 @@ namespace simplecpp { namespace simplecpp { +#ifdef __CYGWIN__ + static bool startsWith(const std::string &s, const std::string &p) + { + return (s.size() >= p.size()) && std::equal(p.begin(), p.end(), s.begin()); + } + std::string convertCygwinToWindowsPath(const std::string &cygwinPath) { std::string windowsPath; std::string::size_type pos = 0; if (cygwinPath.size() >= 11 && startsWith(cygwinPath, "/cygdrive/")) { - unsigned char driveLetter = cygwinPath[10]; + const unsigned char driveLetter = cygwinPath[10]; if (std::isalpha(driveLetter)) { if (cygwinPath.size() == 11) { windowsPath = toupper(driveLetter); @@ -1997,170 +2485,28 @@ namespace simplecpp { return windowsPath; } -} +#endif + bool isAbsolutePath(const std::string &path) + { #ifdef SIMPLECPP_WINDOWS - -class ScopedLock { -public: - explicit ScopedLock(CRITICAL_SECTION& criticalSection) - : m_criticalSection(criticalSection) { - EnterCriticalSection(&m_criticalSection); - } - - ~ScopedLock() { - LeaveCriticalSection(&m_criticalSection); - } - -private: - ScopedLock& operator=(const ScopedLock&); - ScopedLock(const ScopedLock&); - - CRITICAL_SECTION& m_criticalSection; -}; - -class RealFileNameMap { -public: - RealFileNameMap() { - InitializeCriticalSection(&m_criticalSection); - } - - ~RealFileNameMap() { - DeleteCriticalSection(&m_criticalSection); - } - - bool getCacheEntry(const std::string& path, std::string* returnPath) { - ScopedLock lock(m_criticalSection); - - std::map::iterator it = m_fileMap.find(path); - if (it != m_fileMap.end()) { - *returnPath = it->second; + // C:\\path\\file + // C:/path/file + if (path.length() >= 3 && std::isalpha(path[0]) && path[1] == ':' && (path[2] == '\\' || path[2] == '/')) return true; - } - return false; - } - - void addToCache(const std::string& path, const std::string& actualPath) { - ScopedLock lock(m_criticalSection); - m_fileMap[path] = actualPath; - } - -private: - std::map m_fileMap; - CRITICAL_SECTION m_criticalSection; -}; - -static RealFileNameMap realFileNameMap; -static bool realFileName(const std::string &f, std::string *result) -{ - // are there alpha characters in last subpath? - bool alpha = false; - for (std::string::size_type pos = 1; pos <= f.size(); ++pos) { - unsigned char c = f[f.size() - pos]; - if (c == '/' || c == '\\') - break; - if (std::isalpha(c)) { - alpha = true; - break; - } - } + // \\host\path\file + // //host/path/file + if (path.length() >= 2 && (path[0] == '\\' || path[0] == '/') && (path[1] == '\\' || path[1] == '/')) + return true; - // do not convert this path if there are no alpha characters (either pointless or cause wrong results for . and ..) - if (!alpha) return false; - - // Lookup filename or foldername on file system - if (!realFileNameMap.getCacheEntry(f, result)) { - - WIN32_FIND_DATAA FindFileData; - -#ifdef __CYGWIN__ - std::string fConverted = simplecpp::convertCygwinToWindowsPath(f); - HANDLE hFind = FindFirstFileExA(fConverted.c_str(), FindExInfoBasic, &FindFileData, FindExSearchNameMatch, NULL, 0); #else - HANDLE hFind = FindFirstFileExA(f.c_str(), FindExInfoBasic, &FindFileData, FindExSearchNameMatch, NULL, 0); + return !path.empty() && path[0] == '/'; #endif - - if (INVALID_HANDLE_VALUE == hFind) - return false; - *result = FindFileData.cFileName; - realFileNameMap.addToCache(f, *result); - FindClose(hFind); } - return true; } -static RealFileNameMap realFilePathMap; - -/** Change case in given path to match filesystem */ -static std::string realFilename(const std::string &f) -{ - std::string ret; - ret.reserve(f.size()); // this will be the final size - if (realFilePathMap.getCacheEntry(f, &ret)) - return ret; - - // Current subpath - std::string subpath; - - for (std::string::size_type pos = 0; pos < f.size(); ++pos) { - unsigned char c = f[pos]; - - // Separator.. add subpath and separator - if (c == '/' || c == '\\') { - // if subpath is empty just add separator - if (subpath.empty()) { - ret += c; - continue; - } - - bool isDriveSpecification = - (pos == 2 && subpath.size() == 2 && std::isalpha(subpath[0]) && subpath[1] == ':'); - - // Append real filename (proper case) - std::string f2; - if (!isDriveSpecification && realFileName(f.substr(0, pos), &f2)) - ret += f2; - else - ret += subpath; - - subpath.clear(); - - // Append separator - ret += c; - } else { - subpath += c; - } - } - - if (!subpath.empty()) { - std::string f2; - if (realFileName(f,&f2)) - ret += f2; - else - ret += subpath; - } - - realFilePathMap.addToCache(f, ret); - return ret; -} - -static bool isAbsolutePath(const std::string &path) -{ - if (path.length() >= 3 && path[0] > 0 && std::isalpha(path[0]) && path[1] == ':' && (path[2] == '\\' || path[2] == '/')) - return true; - return path.length() > 1U && (path[0] == '/' || path[0] == '\\'); -} -#else -#define realFilename(f) f - -static bool isAbsolutePath(const std::string &path) -{ - return path.length() > 1U && path[0] == '/'; -} -#endif - namespace simplecpp { /** * perform path simplifications for . and .. @@ -2205,14 +2551,19 @@ namespace simplecpp { continue; } // get previous subpath - const std::string::size_type pos1 = path.rfind('/', pos - 1U) + 1U; - const std::string previousSubPath = path.substr(pos1, pos-pos1); + std::string::size_type pos1 = path.rfind('/', pos - 1U); + if (pos1 == std::string::npos) { + pos1 = 0; + } else { + pos1 += 1U; + } + const std::string previousSubPath = path.substr(pos1, pos - pos1); if (previousSubPath == "..") { // don't simplify ++pos; } else { // remove previous subpath and ".." - path.erase(pos1,pos-pos1+4); + path.erase(pos1, pos - pos1 + 4); if (path.empty()) path = "."; // update pos @@ -2227,21 +2578,23 @@ namespace simplecpp { if (unc) path = '/' + path; - return path.find_first_of("*?") == std::string::npos ? realFilename(path) : path; + return path; } } -/** Evaluate sizeof(type) */ +/** Evaluate sizeof(type) + * @throws std::runtime_error thrown on missing arguments or invalid expression + */ static void simplifySizeof(simplecpp::TokenList &expr, const std::map &sizeOfType) { for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) { if (tok->str() != "sizeof") continue; - simplecpp::Token *tok1 = tok->next; + const simplecpp::Token *tok1 = tok->next; if (!tok1) { throw std::runtime_error("missing sizeof argument"); } - simplecpp::Token *tok2 = tok1->next; + const simplecpp::Token *tok2 = tok1->next; if (!tok2) { throw std::runtime_error("missing sizeof argument"); } @@ -2256,7 +2609,7 @@ static void simplifySizeof(simplecpp::TokenList &expr, const std::mapnext) { + for (const simplecpp::Token *typeToken = tok1; typeToken != tok2; typeToken = typeToken->next) { if ((typeToken->str() == "unsigned" || typeToken->str() == "signed") && typeToken->next->name) continue; if (typeToken->str() == "*" && type.find('*') != std::string::npos) @@ -2278,12 +2631,95 @@ static void simplifySizeof(simplecpp::TokenList &expr, const std::map altop(&altopData[0], &altopData[8]); +static bool isCpp17OrLater(const simplecpp::DUI &dui) +{ + const std::string std_ver = simplecpp::getCppStdString(dui.std); + return std_ver.empty() || (std_ver >= "201703L"); +} + +static bool isGnu(const simplecpp::DUI &dui) +{ + return dui.std.rfind("gnu", 0) != std::string::npos; +} + +static std::string dirPath(const std::string& path, bool withTrailingSlash=true) +{ + const std::size_t lastSlash = path.find_last_of("\\/"); + if (lastSlash == std::string::npos) { + return ""; + } + return path.substr(0, lastSlash + (withTrailingSlash ? 1U : 0U)); +} + +static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader); + +/** Evaluate __has_include(include) + * @throws std::runtime_error thrown on missing arguments or invalid expression + */ +static void simplifyHasInclude(simplecpp::TokenList &expr, const simplecpp::DUI &dui) +{ + if (!isCpp17OrLater(dui) && !isGnu(dui)) + return; + + for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) { + if (tok->str() != HAS_INCLUDE) + continue; + const simplecpp::Token *tok1 = tok->next; + if (!tok1) { + throw std::runtime_error("missing __has_include argument"); + } + const simplecpp::Token *tok2 = tok1->next; + if (!tok2) { + throw std::runtime_error("missing __has_include argument"); + } + if (tok1->op == '(') { + tok1 = tok1->next; + while (tok2->op != ')') { + tok2 = tok2->next; + if (!tok2) { + throw std::runtime_error("invalid __has_include expression"); + } + } + } + + const std::string &sourcefile = expr.file(tok->location); + const bool systemheader = (tok1 && tok1->op == '<'); + std::string header; + if (systemheader) { + const simplecpp::Token *tok3 = tok1->next; + if (!tok3) { + throw std::runtime_error("missing __has_include closing angular bracket"); + } + while (tok3->op != '>') { + tok3 = tok3->next; + if (!tok3) { + throw std::runtime_error("invalid __has_include expression"); + } + } + + for (const simplecpp::Token *headerToken = tok1->next; headerToken != tok3; headerToken = headerToken->next) + header += headerToken->str(); + } else { + header = tok1->str().substr(1U, tok1->str().size() - 2U); + } + std::ifstream f; + const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); + tok->setstr(header2.empty() ? "0" : "1"); + + tok2 = tok2->next; + while (tok->next != tok2) + expr.deleteToken(tok->next); + } +} + +/** Evaluate name + * @throws std::runtime_error thrown on undefined function-like macro + */ static void simplifyName(simplecpp::TokenList &expr) { for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) { if (tok->name) { + static const std::set altop{"and","or","bitand","bitor","compl","not","not_eq","xor"}; if (altop.find(tok->str()) != altop.end()) { bool alt; if (tok->str() == "not" || tok->str() == "compl") { @@ -2294,6 +2730,8 @@ static void simplifyName(simplecpp::TokenList &expr) if (alt) continue; } + if (tok->next && tok->next->str() == "(") + throw std::runtime_error("undefined function-like macro invocation: " + tok->str() + "( ... )"); tok->setstr("0"); } } @@ -2305,7 +2743,7 @@ static void simplifyName(simplecpp::TokenList &expr) * unsigned long long value, updating pos to point to the first * unused element of s. * Returns ULLONG_MAX if the result is not representable and - * throws if the above requirements were not possible to satisfy. + * @throws std::runtime_error thrown if the above requirements were not possible to satisfy. */ static unsigned long long stringToULLbounded( const std::string& s, @@ -2313,46 +2751,18 @@ static unsigned long long stringToULLbounded( int base = 0, std::ptrdiff_t minlen = 1, std::size_t maxlen = std::string::npos -) + ) { - std::string sub = s.substr(pos, maxlen); - const char* start = sub.c_str(); + const std::string sub = s.substr(pos, maxlen); + const char * const start = sub.c_str(); char* end; - unsigned long long value = std::strtoull(start, &end, base); + const unsigned long long value = std::strtoull(start, &end, base); pos += end - start; if (end - start < minlen) throw std::runtime_error("expected digit"); return value; } -/* Converts character literal (including prefix, but not ud-suffix) - * to long long value. - * - * Assumes ASCII-compatible single-byte encoded str for narrow literals - * and UTF-8 otherwise. - * - * For target assumes - * - execution character set encoding matching str - * - UTF-32 execution wide-character set encoding - * - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied - * - char16_t is 16bit wide - * - char32_t is 32bit wide - * - wchar_t is 32bit wide and unsigned - * - matching char signedness to host - * - matching sizeof(int) to host - * - * For host assumes - * - ASCII-compatible execution character set - * - * For host and target assumes - * - CHAR_BIT == 8 - * - two's complement - * - * Implements multi-character narrow literals according to GCC's behavior, - * except multi code unit universal character names are not supported. - * Multi-character wide literals are not supported. - * Limited support of universal character names for non-UTF-8 execution character set encodings. - */ long long simplecpp::characterLiteralToLL(const std::string& str) { // default is wide/utf32 @@ -2362,7 +2772,7 @@ long long simplecpp::characterLiteralToLL(const std::string& str) std::size_t pos; - if (str.size() >= 1 && str[0] == '\'') { + if (!str.empty() && str[0] == '\'') { narrow = true; pos = 1; } else if (str.size() >= 2 && str[0] == 'u' && str[1] == '\'') { @@ -2373,8 +2783,9 @@ long long simplecpp::characterLiteralToLL(const std::string& str) pos = 3; } else if (str.size() >= 2 && (str[0] == 'L' || str[0] == 'U') && str[1] == '\'') { pos = 2; - } else + } else { throw std::runtime_error("expected a character literal"); + } unsigned long long multivalue = 0; @@ -2391,7 +2802,7 @@ long long simplecpp::characterLiteralToLL(const std::string& str) if (str[pos] == '\\') { pos++; - char escape = str[pos++]; + const char escape = str[pos++]; if (pos >= str.size()) throw std::runtime_error("unexpected end of character literal"); @@ -2458,7 +2869,7 @@ long long simplecpp::characterLiteralToLL(const std::string& str) case 'u': case 'U': { // universal character names have exactly 4 or 8 digits - std::size_t ndigits = (escape == 'u' ? 4 : 8); + const std::size_t ndigits = (escape == 'u' ? 4 : 8); value = stringToULLbounded(str, pos, 16, ndigits, ndigits); // UTF-8 encodes code points above 0x7f in multiple code units @@ -2486,7 +2897,7 @@ long long simplecpp::characterLiteralToLL(const std::string& str) int additional_bytes; if (value >= 0xf5) // higher values would result in code points above 0x10ffff throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid"); - else if (value >= 0xf0) + if (value >= 0xf0) additional_bytes = 3; else if (value >= 0xe0) additional_bytes = 2; @@ -2501,7 +2912,7 @@ long long simplecpp::characterLiteralToLL(const std::string& str) if (pos + 1 >= str.size()) throw std::runtime_error("assumed UTF-8 encoded source, but character literal ends unexpectedly"); - unsigned char c = str[pos++]; + const unsigned char c = str[pos++]; if (((c >> 6) != 2) // ensure c has form 0xb10xxxxxx || (!value && additional_bytes == 1 && c < 0xa0) // overlong 3-bytes encoding @@ -2546,6 +2957,9 @@ long long simplecpp::characterLiteralToLL(const std::string& str) return multivalue; } +/** + * @throws std::runtime_error thrown on invalid literal + */ static void simplifyNumbers(simplecpp::TokenList &expr) { for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) { @@ -2553,14 +2967,31 @@ static void simplifyNumbers(simplecpp::TokenList &expr) continue; if (tok->str().compare(0,2,"0x") == 0) tok->setstr(toString(stringToULL(tok->str()))); - else if (!tok->number && tok->str().find('\'') != tok->str().npos) + else if (!tok->number && tok->str().find('\'') != std::string::npos) tok->setstr(toString(simplecpp::characterLiteralToLL(tok->str()))); } } -static long long evaluate(simplecpp::TokenList &expr, const std::map &sizeOfType) +static void simplifyComments(simplecpp::TokenList &expr) { + for (simplecpp::Token *tok = expr.front(); tok;) { + simplecpp::Token * const d = tok; + tok = tok->next; + if (d->comment) + expr.deleteToken(d); + } +} + +/** + * @throws std::runtime_error thrown on invalid literals, missing sizeof arguments or invalid expressions, + * missing __has_include() arguments or expressions, undefined function-like macros, invalid number literals + * @throws std::overflow_error thrown on overflow or division by zero + */ +static long long evaluate(simplecpp::TokenList &expr, const simplecpp::DUI &dui, const std::map &sizeOfType) +{ + simplifyComments(expr); simplifySizeof(expr, sizeOfType); + simplifyHasInclude(expr, dui); simplifyName(expr); simplifyNumbers(expr); expr.constFold(); @@ -2581,166 +3012,224 @@ static const simplecpp::Token *gotoNextLine(const simplecpp::Token *tok) class NonExistingFilesCache { public: - NonExistingFilesCache() { - InitializeCriticalSection(&m_criticalSection); - } - - ~NonExistingFilesCache() { - DeleteCriticalSection(&m_criticalSection); - } + NonExistingFilesCache() {} bool contains(const std::string& path) { - ScopedLock lock(m_criticalSection); + std::lock_guard lock(m_mutex); return (m_pathSet.find(path) != m_pathSet.end()); } void add(const std::string& path) { - ScopedLock lock(m_criticalSection); + std::lock_guard lock(m_mutex); m_pathSet.insert(path); } + void clear() { + std::lock_guard lock(m_mutex); + m_pathSet.clear(); + } + private: std::set m_pathSet; - CRITICAL_SECTION m_criticalSection; + std::mutex m_mutex; }; static NonExistingFilesCache nonExistingFilesCache; #endif -static std::string _openHeader(std::ifstream &f, const std::string &path) +static std::string openHeaderDirect(std::ifstream &f, const std::string &path) { #ifdef SIMPLECPP_WINDOWS - std::string simplePath = simplecpp::simplifyPath(path); - if (nonExistingFilesCache.contains(simplePath)) + if (nonExistingFilesCache.contains(path)) return ""; // file is known not to exist, skip expensive file open call - - f.open(simplePath.c_str()); - if (f.is_open()) - return simplePath; - else { - nonExistingFilesCache.add(simplePath); - return ""; - } -#else +#endif f.open(path.c_str()); - return f.is_open() ? simplecpp::simplifyPath(path) : ""; + if (f.is_open()) + return path; +#ifdef SIMPLECPP_WINDOWS + nonExistingFilesCache.add(path); #endif + return ""; } -static std::string getRelativeFileName(const std::string &sourcefile, const std::string &header) -{ - if (sourcefile.find_first_of("\\/") != std::string::npos) - return simplecpp::simplifyPath(sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header); - return simplecpp::simplifyPath(header); -} - -static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header) +static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader) { - return _openHeader(f, getRelativeFileName(sourcefile, header)); -} + if (simplecpp::isAbsolutePath(header)) + return openHeaderDirect(f, simplecpp::simplifyPath(header)); -static std::string getIncludePathFileName(const std::string &includePath, const std::string &header) -{ - std::string path = includePath; - if (!path.empty() && path[path.size()-1U]!='/' && path[path.size()-1U]!='\\') - path += '/'; - return path + header; -} + // prefer first to search the header relatively to source file if found, when not a system header + if (!systemheader) { + std::string path = openHeaderDirect(f, simplecpp::simplifyPath(dirPath(sourcefile) + header)); + if (!path.empty()) { + return path; + } + } -static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header) -{ - for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string simplePath = _openHeader(f, getIncludePathFileName(*it, header)); - if (!simplePath.empty()) - return simplePath; + // search the header on the include paths (provided by the flags "-I...") + for (const auto &includePath : dui.includePaths) { + std::string path = openHeaderDirect(f, simplecpp::simplifyPath(includePath + "/" + header)); + if (!path.empty()) + return path; } return ""; } -static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader) +std::pair simplecpp::FileDataCache::tryload(FileDataCache::name_map_type::iterator &name_it, const simplecpp::DUI &dui, std::vector &filenames, simplecpp::OutputList *outputList) { - if (isAbsolutePath(header)) - return _openHeader(f, header); + const std::string &path = name_it->first; + FileID fileId; - std::string ret; + if (!getFileId(path, fileId)) + return {nullptr, false}; - if (systemheader) { - ret = openHeaderIncludePath(f, dui, header); - return ret.empty() ? openHeaderRelative(f, sourcefile, header) : ret; + const auto id_it = mIdMap.find(fileId); + if (id_it != mIdMap.end()) { + name_it->second = id_it->second; + return {id_it->second, false}; } - ret = openHeaderRelative(f, sourcefile, header); - return ret.empty() ? openHeaderIncludePath(f, dui, header) : ret; + auto *const data = new FileData {path, TokenList(path, filenames, outputList)}; + + if (dui.removeComments) + data->tokens.removeComments(); + + name_it->second = data; + mIdMap.emplace(fileId, data); + mData.emplace_back(data); + + return {data, true}; } -static std::string getFileName(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +std::pair simplecpp::FileDataCache::get(const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader, std::vector &filenames, simplecpp::OutputList *outputList) { - if (filedata.empty()) { - return ""; - } if (isAbsolutePath(header)) { - return (filedata.find(header) != filedata.end()) ? simplecpp::simplifyPath(header) : ""; + auto ins = mNameMap.emplace(simplecpp::simplifyPath(header), nullptr); + + if (ins.second) { + const auto ret = tryload(ins.first, dui, filenames, outputList); + if (ret.first != nullptr) { + return ret; + } + } else { + return {ins.first->second, false}; + } + + return {nullptr, false}; } - const std::string relativeFilename = getRelativeFileName(sourcefile, header); - if (!systemheader && filedata.find(relativeFilename) != filedata.end()) - return relativeFilename; + if (!systemheader) { + auto ins = mNameMap.emplace(simplecpp::simplifyPath(dirPath(sourcefile) + header), nullptr); - for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { - std::string s = simplecpp::simplifyPath(getIncludePathFileName(*it, header)); - if (filedata.find(s) != filedata.end()) - return s; + if (ins.second) { + const auto ret = tryload(ins.first, dui, filenames, outputList); + if (ret.first != nullptr) { + return ret; + } + } else if (ins.first->second != nullptr) { + return {ins.first->second, false}; + } } - if (filedata.find(relativeFilename) != filedata.end()) - return relativeFilename; + for (const auto &includePath : dui.includePaths) { + auto ins = mNameMap.emplace(simplecpp::simplifyPath(includePath + "/" + header), nullptr); - return ""; + if (ins.second) { + const auto ret = tryload(ins.first, dui, filenames, outputList); + if (ret.first != nullptr) { + return ret; + } + } else if (ins.first->second != nullptr) { + return {ins.first->second, false}; + } + } + + return {nullptr, false}; } -static bool hasFile(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) +bool simplecpp::FileDataCache::getFileId(const std::string &path, FileID &id) { - return !getFileName(filedata, sourcefile, header, dui, systemheader).empty(); +#ifdef _WIN32 + HANDLE hFile = CreateFileA(path.c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + + if (hFile == INVALID_HANDLE_VALUE) + return false; + + BOOL ret = GetFileInformationByHandleEx(hFile, FileIdInfo, &id.fileIdInfo, sizeof(id.fileIdInfo)); + if (!ret) { + const DWORD err = GetLastError(); + if (err == ERROR_INVALID_PARAMETER || // encountered when using a non-NTFS filesystem e.g. exFAT + err == ERROR_NOT_SUPPORTED) // encountered on Windows Server Core (used as a Docker container) + { + BY_HANDLE_FILE_INFORMATION fileInfo; + ret = GetFileInformationByHandle(hFile, &fileInfo); + if (ret) { + id.fileIdInfo.VolumeSerialNumber = static_cast(fileInfo.dwVolumeSerialNumber); + id.fileIdInfo.FileId.IdentifierHi = static_cast(fileInfo.nFileIndexHigh); + id.fileIdInfo.FileId.IdentifierLo = static_cast(fileInfo.nFileIndexLow); + } + } + } + + CloseHandle(hFile); + + return ret == TRUE; +#else + struct stat statbuf; + + if (stat(path.c_str(), &statbuf) != 0) + return false; + + id.dev = statbuf.st_dev; + id.ino = statbuf.st_ino; + + return true; +#endif } -std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &fileNumbers, const simplecpp::DUI &dui, simplecpp::OutputList *outputList) +simplecpp::FileDataCache simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &filenames, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, FileDataCache cache) { - std::map ret; +#ifdef SIMPLECPP_WINDOWS + if (dui.clearIncludeCache) + nonExistingFilesCache.clear(); +#endif std::list filelist; // -include files - for (std::list::const_iterator it = dui.includes.begin(); it != dui.includes.end(); ++it) { - const std::string &filename = realFilename(*it); + for (auto it = dui.includes.cbegin(); it != dui.includes.cend(); ++it) { + const std::string &filename = *it; - if (ret.find(filename) != ret.end()) - continue; + const auto loadResult = cache.get("", filename, dui, false, filenames, outputList); + const bool loaded = loadResult.second; + FileData *const filedata = loadResult.first; - std::ifstream fin(filename.c_str()); - if (!fin.is_open()) { + if (filedata == nullptr) { if (outputList) { - simplecpp::Output err(fileNumbers); - err.type = simplecpp::Output::EXPLICIT_INCLUDE_NOT_FOUND; - err.location = Location(fileNumbers); - err.msg = "Can not open include file '" + filename + "' that is explicitly included."; - outputList->push_back(err); + simplecpp::Output err{ + simplecpp::Output::EXPLICIT_INCLUDE_NOT_FOUND, + {}, + "Can not open include file '" + filename + "' that is explicitly included." + }; + outputList->emplace_back(std::move(err)); } continue; } - TokenList *tokenlist = new TokenList(fin, fileNumbers, filename, outputList); - if (!tokenlist->front()) { - delete tokenlist; + if (!loaded) + continue; + + if (!filedata->tokens.front()) continue; - } - ret[filename] = tokenlist; - filelist.push_back(tokenlist->front()); + if (dui.removeComments) + filedata->tokens.removeComments(); + + filelist.emplace_back(filedata->tokens.front()); } - for (const Token *rawtok = rawtokens.cfront(); rawtok || !filelist.empty(); rawtok = rawtok ? rawtok->next : NULL) { - if (rawtok == NULL) { + for (const Token *rawtok = rawtokens.cfront(); rawtok || !filelist.empty(); rawtok = rawtok ? rawtok->next : nullptr) { + if (rawtok == nullptr) { rawtok = filelist.back(); filelist.pop_back(); } @@ -2752,46 +3241,51 @@ std::map simplecpp::load(const simplecpp::To if (!rawtok || rawtok->str() != INCLUDE) continue; - const std::string &sourcefile = rawtok->location.file(); + const std::string &sourcefile = rawtokens.file(rawtok->location); - const Token *htok = rawtok->nextSkipComments(); + const Token * const htok = rawtok->nextSkipComments(); if (!sameline(rawtok, htok)) continue; - bool systemheader = (htok->str()[0] == '<'); - const std::string header(realFilename(htok->str().substr(1U, htok->str().size() - 2U))); - if (hasFile(ret, sourcefile, header, dui, systemheader)) + const bool systemheader = (htok->str()[0] == '<'); + const std::string header(htok->str().substr(1U, htok->str().size() - 2U)); + + const auto loadResult = cache.get(sourcefile, header, dui, systemheader, filenames, outputList); + const bool loaded = loadResult.second; + + if (!loaded) continue; - std::ifstream f; - const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); - if (!f.is_open()) + FileData *const filedata = loadResult.first; + + if (!filedata->tokens.front()) continue; - TokenList *tokens = new TokenList(f, fileNumbers, header2, outputList); - ret[header2] = tokens; - if (tokens->front()) - filelist.push_back(tokens->front()); + if (dui.removeComments) + filedata->tokens.removeComments(); + + filelist.emplace_back(filedata->tokens.front()); } - return ret; + return cache; } -static bool preprocessToken(simplecpp::TokenList &output, const simplecpp::Token **tok1, std::map ¯os, std::vector &files, simplecpp::OutputList *outputList) +static bool preprocessToken(simplecpp::TokenList &output, const simplecpp::Token *&tok1, simplecpp::MacroMap ¯os, std::vector &files, simplecpp::OutputList *outputList) { - const simplecpp::Token *tok = *tok1; - const std::map::const_iterator it = macros.find(tok->str()); + const simplecpp::Token * const tok = tok1; + const simplecpp::MacroMap::const_iterator it = tok->name ? macros.find(tok->str()) : macros.end(); if (it != macros.end()) { simplecpp::TokenList value(files); try { - *tok1 = it->second.expand(&value, tok, macros, files); - } catch (simplecpp::Macro::Error &err) { + tok1 = it->second.expand(value, tok, macros, files); + } catch (const simplecpp::Macro::Error &err) { if (outputList) { - simplecpp::Output out(files); - out.type = simplecpp::Output::SYNTAX_ERROR; - out.location = err.location; - out.msg = "failed to expand \'" + tok->str() + "\', " + err.what; - outputList->push_back(out); + simplecpp::Output out{ + simplecpp::Output::SYNTAX_ERROR, + err.location, + "failed to expand \'" + tok->str() + "\', " + err.what + }; + outputList->emplace_back(std::move(out)); } return false; } @@ -2799,13 +3293,44 @@ static bool preprocessToken(simplecpp::TokenList &output, const simplecpp::Token } else { if (!tok->comment) output.push_back(new simplecpp::Token(*tok)); - *tok1 = tok->next; + tok1 = tok->next; } return true; } -void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector &files, std::map &filedata, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list *macroUsage, std::list *ifCond) +static void getLocaltime(struct tm <ime) +{ + time_t t; + time(&t); +#ifndef _WIN32 + // NOLINTNEXTLINE(misc-include-cleaner) - false positive + localtime_r(&t, <ime); +#else + localtime_s(<ime, &t); +#endif +} + +static std::string getDateDefine(const struct tm *timep) +{ + char buf[] = "??? ?? ????"; + strftime(buf, sizeof(buf), "%b %d %Y", timep); + return std::string("\"").append(buf).append("\""); +} + +static std::string getTimeDefine(const struct tm *timep) { + char buf[] = "??:??:??"; + strftime(buf, sizeof(buf), "%H:%M:%S", timep); + return std::string("\"").append(buf).append("\""); +} + +void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector &files, simplecpp::FileDataCache &cache, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list *macroUsage, std::list *ifCond) +{ +#ifdef SIMPLECPP_WINDOWS + if (dui.clearIncludeCache) + nonExistingFilesCache.clear(); +#endif + std::map sizeOfType(rawtokens.sizeOfType); sizeOfType.insert(std::make_pair("char", sizeof(char))); sizeOfType.insert(std::make_pair("short", sizeof(short))); @@ -2828,60 +3353,118 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL sizeOfType.insert(std::make_pair("double *", sizeof(double *))); sizeOfType.insert(std::make_pair("long double *", sizeof(long double *))); - const bool hasInclude = (dui.std.size() == 5 && dui.std.compare(0,3,"c++") == 0 && dui.std >= "c++17"); - std::map macros; - for (std::list::const_iterator it = dui.defines.begin(); it != dui.defines.end(); ++it) { + // use a dummy vector for the macros because as this is not part of the file and would add an empty entry - e.g. /usr/include/poll.h + std::vector dummy; + + const bool hasInclude = isCpp17OrLater(dui) || isGnu(dui); + MacroMap macros; + bool strictAnsiDefined = false; + for (auto it = dui.defines.cbegin(); it != dui.defines.cend(); ++it) { const std::string ¯ostr = *it; const std::string::size_type eq = macrostr.find('='); const std::string::size_type par = macrostr.find('('); const std::string macroname = macrostr.substr(0, std::min(eq,par)); + if (macroname == "__STRICT_ANSI__") + strictAnsiDefined = true; if (dui.undefined.find(macroname) != dui.undefined.end()) continue; const std::string lhs(macrostr.substr(0,eq)); const std::string rhs(eq==std::string::npos ? std::string("1") : macrostr.substr(eq+1)); - const Macro macro(lhs, rhs, files); - macros.insert(std::pair(macro.name(), macro)); - } - - macros.insert(std::make_pair("__FILE__", Macro("__FILE__", "__FILE__", files))); - macros.insert(std::make_pair("__LINE__", Macro("__LINE__", "__LINE__", files))); - macros.insert(std::make_pair("__COUNTER__", Macro("__COUNTER__", "__COUNTER__", files))); - - if (dui.std == "c++11") - macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "201103L", files))); - else if (dui.std == "c++14") - macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "201402L", files))); - else if (dui.std == "c++17") - macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "201703L", files))); - else if (dui.std == "c++20") - macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "202002L", files))); - - // TRUE => code in current #if block should be kept - // ELSE_IS_TRUE => code in current #if block should be dropped. the code in the #else should be kept. - // ALWAYS_FALSE => drop all code in #if and #else - enum IfState { TRUE, ELSE_IS_TRUE, ALWAYS_FALSE }; + try { + const Macro macro(lhs, rhs, dummy); + macros.insert(std::pair(macro.name(), macro)); + } catch (const std::runtime_error& e) { + if (outputList) { + simplecpp::Output err{ + Output::DUI_ERROR, + {}, + e.what() + }; + outputList->emplace_back(std::move(err)); + } + output.clear(); + return; + } catch (const simplecpp::Macro::Error& e) { + if (outputList) { + simplecpp::Output err{ + Output::DUI_ERROR, + {}, + e.what + }; + outputList->emplace_back(std::move(err)); + } + output.clear(); + return; + } + } + + const bool strictAnsiUndefined = dui.undefined.find("__STRICT_ANSI__") != dui.undefined.cend(); + if (!isGnu(dui) && !strictAnsiDefined && !strictAnsiUndefined) + macros.insert(std::pair("__STRICT_ANSI__", Macro("__STRICT_ANSI__", "1", dummy))); + + macros.insert(std::make_pair("__FILE__", Macro("__FILE__", "__FILE__", dummy))); + macros.insert(std::make_pair("__LINE__", Macro("__LINE__", "__LINE__", dummy))); + macros.insert(std::make_pair("__COUNTER__", Macro("__COUNTER__", "__COUNTER__", dummy))); + struct tm ltime {}; + getLocaltime(ltime); + macros.insert(std::make_pair("__DATE__", Macro("__DATE__", getDateDefine(<ime), dummy))); + macros.insert(std::make_pair("__TIME__", Macro("__TIME__", getTimeDefine(<ime), dummy))); + + if (!dui.std.empty()) { + const cstd_t c_std = simplecpp::getCStd(dui.std); + if (c_std != CUnknown) { + const std::string std_def = simplecpp::getCStdString(c_std); + if (!std_def.empty()) + macros.insert(std::make_pair("__STDC_VERSION__", Macro("__STDC_VERSION__", std_def, dummy))); + } else { + const cppstd_t cpp_std = simplecpp::getCppStd(dui.std); + if (cpp_std == CPPUnknown) { + if (outputList) { + simplecpp::Output err{ + Output::DUI_ERROR, + {}, + "unknown standard specified: '" + dui.std + "'" + }; + outputList->emplace_back(std::move(err)); + } + output.clear(); + return; + } + const std::string std_def = simplecpp::getCppStdString(cpp_std); + if (!std_def.empty()) + macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", std_def, dummy))); + } + } + + // True => code in current #if block should be kept + // ElseIsTrue => code in current #if block should be dropped. the code in the #else should be kept. + // AlwaysFalse => drop all code in #if and #else + enum IfState : std::uint8_t { True, ElseIsTrue, AlwaysFalse }; std::stack ifstates; - ifstates.push(TRUE); + std::stack iftokens; + ifstates.push(True); std::stack includetokenstack; std::set pragmaOnce; includetokenstack.push(rawtokens.cfront()); - for (std::list::const_iterator it = dui.includes.begin(); it != dui.includes.end(); ++it) { - const std::map::const_iterator f = filedata.find(*it); - if (f != filedata.end()) - includetokenstack.push(f->second->cfront()); + for (auto it = dui.includes.cbegin(); it != dui.includes.cend(); ++it) { + const FileData *const filedata = cache.get("", *it, dui, false, files, outputList).first; + if (filedata != nullptr && filedata->tokens.cfront() != nullptr) + includetokenstack.push(filedata->tokens.cfront()); } - for (const Token *rawtok = NULL; rawtok || !includetokenstack.empty();) { - if (rawtok == NULL) { + std::map> maybeUsedMacros; + + for (const Token *rawtok = nullptr; rawtok || !includetokenstack.empty();) { + if (rawtok == nullptr) { rawtok = includetokenstack.top(); includetokenstack.pop(); continue; } - if (rawtok->op == '#' && !sameline(rawtok->previous, rawtok)) { + if (rawtok->op == '#' && !sameline(rawtok->previousSkipComments(), rawtok)) { if (!sameline(rawtok, rawtok->next)) { rawtok = rawtok->next; continue; @@ -2894,28 +3477,33 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL if (ifstates.size() <= 1U && (rawtok->str() == ELIF || rawtok->str() == ELSE || rawtok->str() == ENDIF)) { if (outputList) { - simplecpp::Output err(files); - err.type = Output::SYNTAX_ERROR; - err.location = rawtok->location; - err.msg = "#" + rawtok->str() + " without #if"; - outputList->push_back(err); + simplecpp::Output err{ + Output::SYNTAX_ERROR, + rawtok->location, + "#" + rawtok->str() + " without #if" + }; + outputList->emplace_back(std::move(err)); } output.clear(); return; } - if (ifstates.top() == TRUE && (rawtok->str() == ERROR || rawtok->str() == WARNING)) { + if (ifstates.top() == True && (rawtok->str() == ERROR || rawtok->str() == WARNING)) { if (outputList) { - simplecpp::Output err(rawtok->location.files); - err.type = rawtok->str() == ERROR ? Output::ERROR : Output::WARNING; - err.location = rawtok->location; + std::string msg; for (const Token *tok = rawtok->next; tok && sameline(rawtok,tok); tok = tok->next) { - if (!err.msg.empty() && isNameChar(tok->str()[0])) - err.msg += ' '; - err.msg += tok->str(); + if (!msg.empty() && isNameChar(tok->str()[0])) + msg += ' '; + msg += tok->str(); } - err.msg = '#' + rawtok->str() + ' ' + err.msg; - outputList->push_back(err); + msg = '#' + rawtok->str() + ' ' + msg; + simplecpp::Output err{ + rawtok->str() == ERROR ? Output::ERROR : Output::WARNING, + rawtok->location, + std::move(msg) + }; + + outputList->emplace_back(std::move(err)); } if (rawtok->str() == ERROR) { output.clear(); @@ -2924,29 +3512,41 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL } if (rawtok->str() == DEFINE) { - if (ifstates.top() != TRUE) + if (ifstates.top() != True) continue; try { const Macro ¯o = Macro(rawtok->previous, files); if (dui.undefined.find(macro.name()) == dui.undefined.end()) { - std::map::iterator it = macros.find(macro.name()); + const MacroMap::iterator it = macros.find(macro.name()); if (it == macros.end()) macros.insert(std::pair(macro.name(), macro)); else it->second = macro; } - } catch (const std::runtime_error &) { + } catch (const std::runtime_error &err) { + if (outputList) { + simplecpp::Output out{ + Output::SYNTAX_ERROR, + rawtok->location, + std::string("Failed to parse #define, ") + err.what() + }; + outputList->emplace_back(std::move(out)); + } + output.clear(); + return; + } catch (const simplecpp::Macro::Error &err) { if (outputList) { - simplecpp::Output err(files); - err.type = Output::SYNTAX_ERROR; - err.location = rawtok->location; - err.msg = "Failed to parse #define"; - outputList->push_back(err); + simplecpp::Output out{ + simplecpp::Output::SYNTAX_ERROR, + err.location, + "Failed to parse #define, " + err.what + }; + outputList->emplace_back(std::move(out)); } output.clear(); return; } - } else if (ifstates.top() == TRUE && rawtok->str() == INCLUDE) { + } else if (ifstates.top() == True && rawtok->str() == INCLUDE) { TokenList inc1(files); for (const Token *inctok = rawtok->next; sameline(rawtok,inctok); inctok = inctok->next) { if (!inctok->comment) @@ -2955,7 +3555,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL TokenList inc2(files); if (!inc1.empty() && inc1.cfront()->name) { const Token *inctok = inc1.cfront(); - if (!preprocessToken(inc2, &inctok, macros, files, outputList)) { + if (!preprocessToken(inc2, inctok, macros, files, outputList)) { output.clear(); return; } @@ -2963,7 +3563,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL inc2.takeTokens(inc1); } - if (!inc2.empty() && inc2.cfront()->op == '<' && inc2.cback()->op == '>') { + if (!inc1.empty() && !inc2.empty() && inc2.cfront()->op == '<' && inc2.cback()->op == '>') { TokenString hdr; // TODO: Sometimes spaces must be added in the string // Somehow preprocessToken etc must be told that the location should be source location not destination location @@ -2977,73 +3577,70 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL if (inc2.empty() || inc2.cfront()->str().size() <= 2U) { if (outputList) { - simplecpp::Output err(files); - err.type = Output::SYNTAX_ERROR; - err.location = rawtok->location; - err.msg = "No header in #include"; - outputList->push_back(err); + simplecpp::Output err{ + Output::SYNTAX_ERROR, + rawtok->location, + "No header in #include" + }; + outputList->emplace_back(std::move(err)); } output.clear(); return; } - const Token *inctok = inc2.cfront(); + const Token * const inctok = inc2.cfront(); - const bool systemheader = (inctok->op == '<'); - const std::string header(realFilename(inctok->str().substr(1U, inctok->str().size() - 2U))); - std::string header2 = getFileName(filedata, rawtok->location.file(), header, dui, systemheader); - if (header2.empty()) { - // try to load file.. - std::ifstream f; - header2 = openHeader(f, dui, rawtok->location.file(), header, systemheader); - if (f.is_open()) { - TokenList *tokens = new TokenList(f, files, header2, outputList); - filedata[header2] = tokens; - } - } - if (header2.empty()) { + const bool systemheader = (inctok->str()[0] == '<'); + const std::string header(inctok->str().substr(1U, inctok->str().size() - 2U)); + const FileData *const filedata = cache.get(rawtokens.file(rawtok->location), header, dui, systemheader, files, outputList).first; + if (filedata == nullptr) { if (outputList) { - simplecpp::Output out(files); - out.type = Output::MISSING_HEADER; - out.location = rawtok->location; - out.msg = "Header not found: " + inctok->str(); - outputList->push_back(out); + simplecpp::Output out{ + simplecpp::Output::MISSING_HEADER, + rawtok->location, + "Header not found: " + inctok->str() + }; + outputList->emplace_back(std::move(out)); } } else if (includetokenstack.size() >= 400) { if (outputList) { - simplecpp::Output out(files); - out.type = Output::INCLUDE_NESTED_TOO_DEEPLY; - out.location = rawtok->location; - out.msg = "#include nested too deeply"; - outputList->push_back(out); + simplecpp::Output out{ + simplecpp::Output::INCLUDE_NESTED_TOO_DEEPLY, + rawtok->location, + "#include nested too deeply" + }; + outputList->emplace_back(std::move(out)); } - } else if (pragmaOnce.find(header2) == pragmaOnce.end()) { + } else if (pragmaOnce.find(filedata->filename) == pragmaOnce.end()) { includetokenstack.push(gotoNextLine(rawtok)); - const TokenList *includetokens = filedata.find(header2)->second; - rawtok = includetokens ? includetokens->cfront() : NULL; + rawtok = filedata->tokens.cfront(); continue; } } else if (rawtok->str() == IF || rawtok->str() == IFDEF || rawtok->str() == IFNDEF || rawtok->str() == ELIF) { if (!sameline(rawtok,rawtok->next)) { if (outputList) { - simplecpp::Output out(files); - out.type = Output::SYNTAX_ERROR; - out.location = rawtok->location; - out.msg = "Syntax error in #" + rawtok->str(); - outputList->push_back(out); + simplecpp::Output out{ + simplecpp::Output::SYNTAX_ERROR, + rawtok->location, + "Syntax error in #" + rawtok->str() + }; + outputList->emplace_back(std::move(out)); } output.clear(); return; } bool conditionIsTrue; - if (ifstates.top() == ALWAYS_FALSE || (ifstates.top() == ELSE_IS_TRUE && rawtok->str() != ELIF)) + if (ifstates.top() == AlwaysFalse || (ifstates.top() == ElseIsTrue && rawtok->str() != ELIF)) { conditionIsTrue = false; - else if (rawtok->str() == IFDEF) + } + else if (rawtok->str() == IFDEF) { conditionIsTrue = (macros.find(rawtok->next->str()) != macros.end() || (hasInclude && rawtok->next->str() == HAS_INCLUDE)); - else if (rawtok->str() == IFNDEF) + maybeUsedMacros[rawtok->next->str()].emplace_back(rawtok->next->location); + } else if (rawtok->str() == IFNDEF) { conditionIsTrue = (macros.find(rawtok->next->str()) == macros.end() && !(hasInclude && rawtok->next->str() == HAS_INCLUDE)); - else { /*if (rawtok->str() == IF || rawtok->str() == ELIF)*/ + maybeUsedMacros[rawtok->next->str()].emplace_back(rawtok->next->location); + } else { /*if (rawtok->str() == IF || rawtok->str() == ELIF)*/ TokenList expr(files); for (const Token *tok = rawtok->next; tok && tok->location.sameline(rawtok->location); tok = tok->next) { if (!tok->name) { @@ -3056,6 +3653,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const bool par = (tok && tok->op == '('); if (par) tok = tok->next; + maybeUsedMacros[rawtok->next->str()].emplace_back(rawtok->next->location); if (tok) { if (macros.find(tok->str()) != macros.end()) expr.push_back(new Token("1", tok->location)); @@ -3065,14 +3663,15 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL expr.push_back(new Token("0", tok->location)); } if (par) - tok = tok ? tok->next : NULL; + tok = tok ? tok->next : nullptr; if (!tok || !sameline(rawtok,tok) || (par && tok->op != ')')) { if (outputList) { - Output out(rawtok->location.files); - out.type = Output::SYNTAX_ERROR; - out.location = rawtok->location; - out.msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition"; - outputList->push_back(out); + Output out{ + Output::SYNTAX_ERROR, + rawtok->location, + "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition" + }; + outputList->emplace_back(std::move(out)); } output.clear(); return; @@ -3085,23 +3684,37 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const bool par = (tok && tok->op == '('); if (par) tok = tok->next; + bool closingAngularBracket = false; if (tok) { - const std::string &sourcefile = rawtok->location.file(); - const bool systemheader = (tok->str()[0] == '<'); - const std::string header(realFilename(tok->str().substr(1U, tok->str().size() - 2U))); - std::ifstream f; - const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); - expr.push_back(new Token(header2.empty() ? "0" : "1", tok->location)); + const std::string &sourcefile = rawtokens.file(rawtok->location); + const bool systemheader = (tok && tok->op == '<'); + std::string header; + + if (systemheader) { + while ((tok = tok->next) && tok->op != '>') + header += tok->str(); + if (tok && tok->op == '>') + closingAngularBracket = true; + } else { + header = tok->str().substr(1U, tok->str().size() - 2U); + closingAngularBracket = true; + } + if (tok) { + std::ifstream f; + const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); + expr.push_back(new Token(header2.empty() ? "0" : "1", tok->location)); + } } if (par) - tok = tok ? tok->next : NULL; - if (!tok || !sameline(rawtok,tok) || (par && tok->op != ')')) { + tok = tok ? tok->next : nullptr; + if (!tok || !sameline(rawtok,tok) || (par && tok->op != ')') || (!closingAngularBracket)) { if (outputList) { - Output out(rawtok->location.files); - out.type = Output::SYNTAX_ERROR; - out.location = rawtok->location; - out.msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition"; - outputList->push_back(out); + Output out{ + Output::SYNTAX_ERROR, + rawtok->location, + "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition" + }; + outputList->emplace_back(std::move(out)); } output.clear(); return; @@ -3109,8 +3722,10 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL continue; } + maybeUsedMacros[rawtok->next->str()].emplace_back(rawtok->next->location); + const Token *tmp = tok; - if (!preprocessToken(expr, &tmp, macros, files, outputList)) { + if (!preprocessToken(expr, tmp, macros, files, outputList)) { output.clear(); return; } @@ -3123,22 +3738,24 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL std::string E; for (const simplecpp::Token *tok = expr.cfront(); tok; tok = tok->next) E += (E.empty() ? "" : " ") + tok->str(); - const long long result = evaluate(expr, sizeOfType); + const long long result = evaluate(expr, dui, sizeOfType); conditionIsTrue = (result != 0); - ifCond->push_back(IfCond(rawtok->location, E, result)); + ifCond->emplace_back(rawtok->location, E, result); } else { - const long long result = evaluate(expr, sizeOfType); + const long long result = evaluate(expr, dui, sizeOfType); conditionIsTrue = (result != 0); } - } catch (const std::exception &e) { + } catch (const std::runtime_error &e) { if (outputList) { - Output out(rawtok->location.files); - out.type = Output::SYNTAX_ERROR; - out.location = rawtok->location; - out.msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition"; + std::string msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition"; if (e.what() && *e.what()) - out.msg += std::string(", ") + e.what(); - outputList->push_back(out); + msg += std::string(", ") + e.what(); + Output out{ + Output::SYNTAX_ERROR, + rawtok->location, + std::move(msg) + }; + outputList->emplace_back(std::move(out)); } output.clear(); return; @@ -3147,35 +3764,46 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL if (rawtok->str() != ELIF) { // push a new ifstate.. - if (ifstates.top() != TRUE) - ifstates.push(ALWAYS_FALSE); + if (ifstates.top() != True) + ifstates.push(AlwaysFalse); else - ifstates.push(conditionIsTrue ? TRUE : ELSE_IS_TRUE); - } else if (ifstates.top() == TRUE) { - ifstates.top() = ALWAYS_FALSE; - } else if (ifstates.top() == ELSE_IS_TRUE && conditionIsTrue) { - ifstates.top() = TRUE; + ifstates.push(conditionIsTrue ? True : ElseIsTrue); + iftokens.push(rawtok); + } else { + if (ifstates.top() == True) + ifstates.top() = AlwaysFalse; + else if (ifstates.top() == ElseIsTrue && conditionIsTrue) + ifstates.top() = True; + iftokens.top()->nextcond = rawtok; + iftokens.top() = rawtok; } } else if (rawtok->str() == ELSE) { - ifstates.top() = (ifstates.top() == ELSE_IS_TRUE) ? TRUE : ALWAYS_FALSE; + ifstates.top() = (ifstates.top() == ElseIsTrue) ? True : AlwaysFalse; + iftokens.top()->nextcond = rawtok; + iftokens.top() = rawtok; } else if (rawtok->str() == ENDIF) { ifstates.pop(); + iftokens.top()->nextcond = rawtok; + iftokens.pop(); } else if (rawtok->str() == UNDEF) { - if (ifstates.top() == TRUE) { + if (ifstates.top() == True) { const Token *tok = rawtok->next; while (sameline(rawtok,tok) && tok->comment) tok = tok->next; if (sameline(rawtok, tok)) macros.erase(tok->str()); } - } else if (ifstates.top() == TRUE && rawtok->str() == PRAGMA && rawtok->next && rawtok->next->str() == ONCE && sameline(rawtok,rawtok->next)) { - pragmaOnce.insert(rawtok->location.file()); + } else if (ifstates.top() == True && rawtok->str() == PRAGMA && rawtok->next && rawtok->next->str() == ONCE && sameline(rawtok,rawtok->next)) { + pragmaOnce.insert(rawtokens.file(rawtok->location)); } - rawtok = gotoNextLine(rawtok); + if (ifstates.top() != True && rawtok->nextcond) + rawtok = rawtok->nextcond->previous; + else + rawtok = gotoNextLine(rawtok); continue; } - if (ifstates.top() != TRUE) { + if (ifstates.top() != True) { // drop code rawtok = gotoNextLine(rawtok); continue; @@ -3195,7 +3823,7 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL const Location loc(rawtok->location); TokenList tokens(files); - if (!preprocessToken(tokens, &rawtok, macros, files, outputList)) { + if (!preprocessToken(tokens, rawtok, macros, files, outputList)) { output.clear(); return; } @@ -3216,23 +3844,126 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL } if (macroUsage) { - for (std::map::const_iterator macroIt = macros.begin(); macroIt != macros.end(); ++macroIt) { + for (simplecpp::MacroMap::const_iterator macroIt = macros.begin(); macroIt != macros.end(); ++macroIt) { const Macro ¯o = macroIt->second; - const std::list &usage = macro.usage(); + std::list usage = macro.usage(); + const std::list& temp = maybeUsedMacros[macro.name()]; + usage.insert(usage.end(), temp.begin(), temp.end()); for (std::list::const_iterator usageIt = usage.begin(); usageIt != usage.end(); ++usageIt) { - MacroUsage mu(usageIt->files, macro.valueDefinedInCode()); + MacroUsage mu(macro.valueDefinedInCode()); mu.macroName = macro.name(); mu.macroLocation = macro.defineLocation(); mu.useLocation = *usageIt; - macroUsage->push_back(mu); + macroUsage->emplace_back(std::move(mu)); } } } } -void simplecpp::cleanup(std::map &filedata) +void simplecpp::cleanup(FileDataCache &cache) +{ + cache.clear(); +} + +simplecpp::cstd_t simplecpp::getCStd(const std::string &std) +{ + if (std == "c90" || std == "c89" || std == "iso9899:1990" || std == "iso9899:199409" || std == "gnu90" || std == "gnu89") + return C89; + if (std == "c99" || std == "c9x" || std == "iso9899:1999" || std == "iso9899:199x" || std == "gnu99" || std == "gnu9x") + return C99; + if (std == "c11" || std == "c1x" || std == "iso9899:2011" || std == "gnu11" || std == "gnu1x") + return C11; + if (std == "c17" || std == "c18" || std == "iso9899:2017" || std == "iso9899:2018" || std == "gnu17" || std == "gnu18") + return C17; + if (std == "c23" || std == "gnu23" || std == "c2x" || std == "gnu2x") + return C23; + if (std == "c2y" || std == "gnu2y") + return C2Y; + return CUnknown; +} + +std::string simplecpp::getCStdString(cstd_t std) +{ + switch (std) { + case C89: + // __STDC_VERSION__ is not set for C90 although the macro was added in the 1994 amendments + return ""; + case C99: + return "199901L"; + case C11: + return "201112L"; + case C17: + return "201710L"; + case C23: + // supported by GCC 9+ and Clang 9+ + // Clang 9, 10, 11, 12, 13 return "201710L" + // Clang 14, 15, 16, 17 return "202000L" + // Clang 9, 10, 11, 12, 13, 14, 15, 16, 17 do not support "c23" and "gnu23" + return "202311L"; + case C2Y: + // supported by GCC 15+ and Clang 19+ + // Clang 19, 20, 21, 22 return "202400L" + return "202500L"; + case CUnknown: + return ""; + } + return ""; +} + +std::string simplecpp::getCStdString(const std::string &std) +{ + return getCStdString(getCStd(std)); +} + +simplecpp::cppstd_t simplecpp::getCppStd(const std::string &std) +{ + if (std == "c++98" || std == "c++03" || std == "gnu++98" || std == "gnu++03") + return CPP03; + if (std == "c++11" || std == "gnu++11" || std == "c++0x" || std == "gnu++0x") + return CPP11; + if (std == "c++14" || std == "c++1y" || std == "gnu++14" || std == "gnu++1y") + return CPP14; + if (std == "c++17" || std == "c++1z" || std == "gnu++17" || std == "gnu++1z") + return CPP17; + if (std == "c++20" || std == "c++2a" || std == "gnu++20" || std == "gnu++2a") + return CPP20; + if (std == "c++23" || std == "c++2b" || std == "gnu++23" || std == "gnu++2b") + return CPP23; + if (std == "c++26" || std == "c++2c" || std == "gnu++26" || std == "gnu++2c") + return CPP26; + return CPPUnknown; +} + +std::string simplecpp::getCppStdString(cppstd_t std) +{ + switch (std) { + case CPP03: + return "199711L"; + case CPP11: + return "201103L"; + case CPP14: + return "201402L"; + case CPP17: + return "201703L"; + case CPP20: + // GCC 10 returns "201703L" - correct in 11+ + return "202002L"; + case CPP23: + // supported by GCC 11+ and Clang 12+ + // GCC 11, 12, 13 return "202100L" + // Clang 12, 13, 14, 15, 16 do not support "c++23" and "gnu++23" and return "202101L" + // Clang 17, 18 return "202302L" + return "202302L"; + case CPP26: + // supported by GCC 14+ and Clang 17+ + return "202400L"; + case CPPUnknown: + return ""; + } + return ""; +} + +std::string simplecpp::getCppStdString(const std::string &std) { - for (std::map::iterator it = filedata.begin(); it != filedata.end(); ++it) - delete it->second; - filedata.clear(); + return getCppStdString(getCppStd(std)); } diff --git a/simplecpp.h b/simplecpp.h old mode 100755 new mode 100644 index 82adad40..f29166ff --- a/simplecpp.h +++ b/simplecpp.h @@ -1,32 +1,33 @@ -/* +/* -*- C++ -*- * simplecpp - A simple and high-fidelity C/C++ preprocessor library - * Copyright (C) 2016 Daniel Marjamäki. - * - * This library is free software: you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation, either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . + * Copyright (C) 2016-2023 simplecpp team */ #ifndef simplecppH #define simplecppH #include -#include -#include +#include +#include +#include #include #include +#include #include #include +#include +#include #include +#if __cplusplus >= 202002L +# include +#endif + +#if defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) +#include +#endif +#ifdef __cpp_lib_span +#include +#endif #ifdef _WIN32 # ifdef SIMPLECPP_EXPORT @@ -40,28 +41,91 @@ # define SIMPLECPP_LIB #endif +#ifndef _WIN32 +# include +#endif + +#if defined(_MSC_VER) +# pragma warning(push) +// suppress warnings about "conversion from 'type1' to 'type2', possible loss of data" +# pragma warning(disable : 4267) +# pragma warning(disable : 4244) +#endif + +// provide legacy (i.e. raw pointer) API for TokenList +// note: std::istream has an overhead compared to raw pointers +#ifndef SIMPLECPP_TOKENLIST_ALLOW_PTR +// still provide the legacy API in case we lack the performant wrappers +# if !defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) +# define SIMPLECPP_TOKENLIST_ALLOW_PTR 1 +# else +# define SIMPLECPP_TOKENLIST_ALLOW_PTR 0 +# endif +#endif namespace simplecpp { + /** C code standard */ + enum cstd_t : std::int8_t { CUnknown=-1, C89, C99, C11, C17, C23, C2Y }; + + /** C++ code standard */ + enum cppstd_t : std::int8_t { CPPUnknown=-1, CPP03, CPP11, CPP14, CPP17, CPP20, CPP23, CPP26 }; - typedef std::string TokenString; + using TokenString = std::string; + +#if defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) + using View = std::string_view; +#else + struct View + { + // cppcheck-suppress noExplicitConstructor + View(const char* data) + : mData(data) + , mSize(strlen(data)) + {} + + // only provide when std::span is not available so using untyped initialization won't use View +#if !defined(__cpp_lib_span) + View(const char* data, std::size_t size) + : mData(data) + , mSize(size) + {} + + // cppcheck-suppress noExplicitConstructor + View(const std::string& str) + : mData(str.data()) + , mSize(str.size()) + {} +#endif // !defined(__cpp_lib_span) + + const char* data() const { + return mData; + } + + std::size_t size() const { + return mSize; + } + + private: + const char* mData; + std::size_t mSize; + }; +#endif // defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) + + class Macro; /** * Location in source code */ - class SIMPLECPP_LIB Location { - public: - explicit Location(const std::vector &f) : files(f), fileIndex(0), line(1U), col(0U) {} - - Location(const Location &loc) : files(loc.files), fileIndex(loc.fileIndex), line(loc.line), col(loc.col) {} + struct SIMPLECPP_LIB Location { + Location() = default; + Location(unsigned int fileIndex, unsigned int line, unsigned int col) + : fileIndex(fileIndex) + , line(line) + , col(col) + {} - Location &operator=(const Location &other) { - if (this != &other) { - fileIndex = other.fileIndex; - line = other.line; - col = other.col; - } - return *this; - } + Location(const Location &loc) = default; + Location &operator=(const Location &other) = default; /** increment this location by string */ void adjust(const std::string &str); @@ -78,16 +142,9 @@ namespace simplecpp { return fileIndex == other.fileIndex && line == other.line; } - const std::string& file() const { - return fileIndex < files.size() ? files[fileIndex] : emptyFileName; - } - - const std::vector &files; - unsigned int fileIndex; - unsigned int line; - unsigned int col; - private: - static const std::string emptyFileName; + unsigned int fileIndex{}; + unsigned int line{}; + unsigned int col{}; }; /** @@ -96,23 +153,15 @@ namespace simplecpp { */ class SIMPLECPP_LIB Token { public: - Token(const TokenString &s, const Location &loc) : - location(loc), previous(NULL), next(NULL), string(s) { + Token(const TokenString &s, const Location &loc, bool wsahead = false) : + whitespaceahead(wsahead), location(loc), string(s) { flags(); } Token(const Token &tok) : - macro(tok.macro), location(tok.location), previous(NULL), next(NULL), string(tok.string) { - flags(); - } + macro(tok.macro), op(tok.op), comment(tok.comment), name(tok.name), number(tok.number), whitespaceahead(tok.whitespaceahead), location(tok.location), string(tok.string), mExpandedFrom(tok.mExpandedFrom) {} - void flags() { - name = (std::isalpha((unsigned char)string[0]) || string[0] == '_' || string[0] == '$') - && (string.find('\'') == string.npos); - comment = string.size() > 1U && string[0] == '/' && (string[1] == '/' || string[1] == '*'); - number = std::isdigit((unsigned char)string[0]) || (string.size() > 1U && string[0] == '-' && std::isdigit((unsigned char)string[1])); - op = (string.size() == 1U) ? string[0] : '\0'; - } + Token &operator=(const Token &tok) = delete; const TokenString& str() const { return string; @@ -125,15 +174,21 @@ namespace simplecpp { bool isOneOf(const char ops[]) const; bool startsWithOneOf(const char c[]) const; bool endsWithOneOf(const char c[]) const; + static bool isNumberLike(const std::string& s) { + return std::isdigit(static_cast(s[0])) || + (s.size() > 1U && (s[0] == '-' || s[0] == '+') && std::isdigit(static_cast(s[1]))); + } TokenString macro; char op; bool comment; bool name; bool number; + bool whitespaceahead; Location location; - Token *previous; - Token *next; + Token *previous{}; + Token *next{}; + mutable const Token *nextcond{}; const Token *previousSkipComments() const { const Token *tok = this->previous; @@ -149,19 +204,38 @@ namespace simplecpp { return tok; } + void setExpandedFrom(const Token *tok, const Macro* m) { + mExpandedFrom = tok->mExpandedFrom; + mExpandedFrom.insert(m); + if (tok->whitespaceahead) + whitespaceahead = true; + } + bool isExpandedFrom(const Macro* m) const { + return mExpandedFrom.find(m) != mExpandedFrom.end(); + } + void markExpandedFrom(const Macro* m) { + mExpandedFrom.insert(m); + } + void printAll() const; void printOut() const; private: + void flags() { + name = (std::isalpha(static_cast(string[0])) || string[0] == '_' || string[0] == '$') + && (std::memchr(string.c_str(), '\'', string.size()) == nullptr); + comment = string.size() > 1U && string[0] == '/' && (string[1] == '/' || string[1] == '*'); + number = isNumberLike(string); + op = (string.size() == 1U && !name && !comment && !number) ? string[0] : '\0'; + } + TokenString string; - // Not implemented - prevent assignment - Token &operator=(const Token &tok); + std::set mExpandedFrom; }; /** Output from preprocessor */ struct SIMPLECPP_LIB Output { - explicit Output(const std::vector &files) : type(ERROR), location(files) {} - enum Type { + enum Type : std::uint8_t { ERROR, /* #error */ WARNING, /* #warning */ MISSING_HEADER, @@ -169,28 +243,68 @@ namespace simplecpp { SYNTAX_ERROR, PORTABILITY_BACKSLASH, UNHANDLED_CHAR_ERROR, - EXPLICIT_INCLUDE_NOT_FOUND + EXPLICIT_INCLUDE_NOT_FOUND, + FILE_NOT_FOUND, + DUI_ERROR } type; + Output(Type type, const Location& loc, std::string msg) : type(type), location(loc), msg(std::move(msg)) {} Location location; std::string msg; }; - typedef std::list OutputList; + using OutputList = std::list; /** List of tokens. */ class SIMPLECPP_LIB TokenList { public: + class Stream; + explicit TokenList(std::vector &filenames); - TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = NULL); + /** generates a token list from the given std::istream parameter */ + TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); + /** generates a token list from the given buffer */ + template + TokenList(const char (&data)[size], std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data), size-1, filenames, filename, outputList, 0) + {} + /** generates a token list from the given buffer */ + template + TokenList(const unsigned char (&data)[size], std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(data, size-1, filenames, filename, outputList, 0) + {} +#if SIMPLECPP_TOKENLIST_ALLOW_PTR + /** generates a token list from the given buffer */ + TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(data, size, filenames, filename, outputList, 0) + {} + /** generates a token list from the given buffer */ + TokenList(const char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data), size, filenames, filename, outputList, 0) + {} +#endif // SIMPLECPP_TOKENLIST_ALLOW_PTR + /** generates a token list from the given buffer */ + TokenList(View data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data.data()), data.size(), filenames, filename, outputList, 0) + {} +#ifdef __cpp_lib_span + /** generates a token list from the given buffer */ + TokenList(std::span data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(reinterpret_cast(data.data()), data.size(), filenames, filename, outputList, 0) + {} + + /** generates a token list from the given buffer */ + TokenList(std::span data, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr) + : TokenList(data.data(), data.size(), filenames, filename, outputList, 0) + {} +#endif // __cpp_lib_span + + /** generates a token list from the given filename parameter */ + TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList = nullptr); TokenList(const TokenList &other); -#if __cplusplus >= 201103L TokenList(TokenList &&other); -#endif ~TokenList(); TokenList &operator=(const TokenList &other); -#if __cplusplus >= 201103L TokenList &operator=(TokenList &&other); -#endif void clear(); bool empty() const { @@ -198,10 +312,14 @@ namespace simplecpp { } void push_back(Token *tok); - void dump() const; - std::string stringify() const; + void dump(bool linenrs = false) const; + std::string stringify(bool linenrs = false) const; - void readfile(std::istream &istr, const std::string &filename=std::string(), OutputList *outputList = NULL); + void readfile(Stream &stream, const std::string &filename=std::string(), OutputList *outputList = nullptr); + /** + * @throws std::overflow_error thrown on overflow or division by zero + * @throws std::runtime_error thrown on invalid expressions + */ void constFold(); void removeComments(); @@ -225,8 +343,8 @@ namespace simplecpp { void deleteToken(Token *tok) { if (!tok) return; - Token *prev = tok->previous; - Token *next = tok->next; + Token * const prev = tok->previous; + Token * const next = tok->next; if (prev) prev->next = next; if (next) @@ -248,28 +366,43 @@ namespace simplecpp { other.frontToken->previous = backToken; } backToken = other.backToken; - other.frontToken = other.backToken = NULL; + other.frontToken = other.backToken = nullptr; } /** sizeof(T) */ std::map sizeOfType; + const std::vector& getFiles() const { + return files; + } + + const std::string& file(const Location& loc) const; + private: + TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList, int /*unused*/); + void combineOperators(); void constFoldUnaryNotPosNeg(Token *tok); + /** + * @throws std::overflow_error thrown on overflow or division by zero + */ void constFoldMulDivRem(Token *tok); void constFoldAddSub(Token *tok); void constFoldShift(Token *tok); void constFoldComparison(Token *tok); void constFoldBitwise(Token *tok); void constFoldLogicalOp(Token *tok); - void constFoldQuestionOp(Token **tok1); + /** + * @throws std::runtime_error thrown on invalid expressions + */ + void constFoldQuestionOp(Token *&tok1); - std::string readUntil(std::istream &istr, const Location &location, char start, char end, OutputList *outputList, unsigned int bom); - void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); + std::string readUntil(Stream &stream, const Location &location, char start, char end, OutputList *outputList); + void lineDirective(unsigned int fileIndex_, unsigned int line, Location &location); - std::string lastLine(int maxsize=100000) const; + const Token* lastLineTok(int maxsize=1000) const; + const Token* isLastLinePreprocessor(int maxsize=1000) const; unsigned int fileIndex(const std::string &filename); @@ -280,11 +413,11 @@ namespace simplecpp { /** Tracking how macros are used */ struct SIMPLECPP_LIB MacroUsage { - explicit MacroUsage(const std::vector &f, bool macroValueKnown_) : macroLocation(f), useLocation(f), macroValueKnown(macroValueKnown_) {} + explicit MacroUsage(bool macroValueKnown_) : macroValueKnown(macroValueKnown_) {} std::string macroName; - Location macroLocation; - Location useLocation; - bool macroValueKnown; + Location macroLocation; + Location useLocation; + bool macroValueKnown; }; /** Tracking #if/#elif expressions */ @@ -300,17 +433,157 @@ namespace simplecpp { * On the command line these are configured by -D, -U, -I, --include, -std */ struct SIMPLECPP_LIB DUI { - DUI() {} + DUI() = default; std::list defines; std::set undefined; std::list includePaths; std::list includes; std::string std; + bool clearIncludeCache{}; + bool removeComments{}; /** remove comment tokens from included files */ + }; + + struct SIMPLECPP_LIB FileData { + /** The canonical filename associated with this data */ + std::string filename; + /** The tokens associated with this file */ + TokenList tokens; + }; + + class SIMPLECPP_LIB FileDataCache { + public: + FileDataCache() = default; + + FileDataCache(const FileDataCache &) = delete; + FileDataCache(FileDataCache &&) = default; + + FileDataCache &operator=(const FileDataCache &) = delete; + FileDataCache &operator=(FileDataCache &&) = default; + + /** Get the cached data for a file, or load and then return it if it isn't cached. + * returns the file data and true if the file was loaded, false if it was cached. */ + std::pair get(const std::string &sourcefile, const std::string &header, const DUI &dui, bool systemheader, std::vector &filenames, OutputList *outputList); + + void insert(FileData data) { + // NOLINTNEXTLINE(misc-const-correctness) - FP + auto *const newdata = new FileData(std::move(data)); + + mData.emplace_back(newdata); + mNameMap.emplace(newdata->filename, newdata); + } + + void clear() { + mNameMap.clear(); + mIdMap.clear(); + mData.clear(); + } + + using container_type = std::vector>; + using iterator = container_type::iterator; + using const_iterator = container_type::const_iterator; + using size_type = container_type::size_type; + + size_type size() const { + return mData.size(); + } + iterator begin() { + return mData.begin(); + } + iterator end() { + return mData.end(); + } + const_iterator begin() const { + return mData.begin(); + } + const_iterator end() const { + return mData.end(); + } + const_iterator cbegin() const { + return mData.cbegin(); + } + const_iterator cend() const { + return mData.cend(); + } + + private: + struct FileID { +#ifdef _WIN32 + struct { + std::uint64_t VolumeSerialNumber; + struct { + std::uint64_t IdentifierHi; + std::uint64_t IdentifierLo; + } FileId; + } fileIdInfo; + + bool operator==(const FileID &that) const noexcept { + return fileIdInfo.VolumeSerialNumber == that.fileIdInfo.VolumeSerialNumber && + fileIdInfo.FileId.IdentifierHi == that.fileIdInfo.FileId.IdentifierHi && + fileIdInfo.FileId.IdentifierLo == that.fileIdInfo.FileId.IdentifierLo; + } +#else + dev_t dev; + ino_t ino; + + bool operator==(const FileID& that) const noexcept { + return dev == that.dev && ino == that.ino; + } +#endif + struct Hasher { + std::size_t operator()(const FileID &id) const { +#ifdef _WIN32 + return static_cast(id.fileIdInfo.FileId.IdentifierHi ^ id.fileIdInfo.FileId.IdentifierLo ^ + id.fileIdInfo.VolumeSerialNumber); +#else + return static_cast(id.dev) ^ static_cast(id.ino); +#endif + } + }; + }; + + using name_map_type = std::unordered_map; + using id_map_type = std::unordered_map; + + static bool getFileId(const std::string &path, FileID &id); + + std::pair tryload(name_map_type::iterator &name_it, const DUI &dui, std::vector &filenames, OutputList *outputList); + + container_type mData; + name_map_type mNameMap; + id_map_type mIdMap; }; + /** Converts character literal (including prefix, but not ud-suffix) to long long value. + * + * Assumes ASCII-compatible single-byte encoded str for narrow literals + * and UTF-8 otherwise. + * + * For target assumes + * - execution character set encoding matching str + * - UTF-32 execution wide-character set encoding + * - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied + * - char16_t is 16bit wide + * - char32_t is 32bit wide + * - wchar_t is 32bit wide and unsigned + * - matching char signedness to host + * - matching sizeof(int) to host + * + * For host assumes + * - ASCII-compatible execution character set + * + * For host and target assumes + * - CHAR_BIT == 8 + * - two's complement + * + * Implements multi-character narrow literals according to GCC's behavior, + * except multi code unit universal character names are not supported. + * Multi-character wide literals are not supported. + * Limited support of universal character names for non-UTF-8 execution character set encodings. + * @throws std::runtime_error thrown on invalid literal + */ SIMPLECPP_LIB long long characterLiteralToLL(const std::string& str); - SIMPLECPP_LIB std::map load(const TokenList &rawtokens, std::vector &filenames, const DUI &dui, OutputList *outputList = NULL); + SIMPLECPP_LIB FileDataCache load(const TokenList &rawtokens, std::vector &filenames, const DUI &dui, OutputList *outputList = nullptr, FileDataCache cache = {}); /** * Preprocess @@ -318,24 +591,49 @@ namespace simplecpp { * @param output TokenList that receives the preprocessing output * @param rawtokens Raw tokenlist for top sourcefile * @param files internal data of simplecpp - * @param filedata output from simplecpp::load() + * @param cache output from simplecpp::load() * @param dui defines, undefs, and include paths * @param outputList output: list that will receive output messages * @param macroUsage output: macro usage * @param ifCond output: #if/#elif expressions */ - SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector &files, std::map &filedata, const DUI &dui, OutputList *outputList = NULL, std::list *macroUsage = NULL, std::list *ifCond = NULL); + SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector &files, FileDataCache &cache, const DUI &dui, OutputList *outputList = nullptr, std::list *macroUsage = nullptr, std::list *ifCond = nullptr); /** * Deallocate data */ - SIMPLECPP_LIB void cleanup(std::map &filedata); + SIMPLECPP_LIB void cleanup(FileDataCache &cache); /** Simplify path */ SIMPLECPP_LIB std::string simplifyPath(std::string path); /** Convert Cygwin path to Windows path */ SIMPLECPP_LIB std::string convertCygwinToWindowsPath(const std::string &cygwinPath); + + /** Returns the C version a given standard */ + SIMPLECPP_LIB cstd_t getCStd(const std::string &std); + + /** Returns the C++ version a given standard */ + SIMPLECPP_LIB cppstd_t getCppStd(const std::string &std); + + /** Returns the __STDC_VERSION__ value for a given standard */ + SIMPLECPP_LIB std::string getCStdString(const std::string &std); + SIMPLECPP_LIB std::string getCStdString(cstd_t std); + + /** Returns the __cplusplus value for a given standard */ + SIMPLECPP_LIB std::string getCppStdString(const std::string &std); + SIMPLECPP_LIB std::string getCppStdString(cppstd_t std); + + /** Checks if given path is absolute */ + SIMPLECPP_LIB bool isAbsolutePath(const std::string &path); } +#undef SIMPLECPP_TOKENLIST_ALLOW_PTR + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#undef SIMPLECPP_LIB + #endif diff --git a/test.cpp b/test.cpp index 9404e655..b654e159 100644 --- a/test.cpp +++ b/test.cpp @@ -1,28 +1,79 @@ +/* + * simplecpp - A simple and high-fidelity C/C++ preprocessor library + * Copyright (C) 2016-2023 simplecpp team + */ +#include "simplecpp.h" + +#include +#include +#include +#include +#include #include -#include +#include #include +#include +#include +#include #include -#include "simplecpp.h" +#ifndef SIMPLECPP_TEST_SOURCE_DIR +#error "SIMPLECPP_TEST_SOURCE_DIR is not defined." +#endif + +#define STRINGIZE_(x) #x +#define STRINGIZE(x) STRINGIZE_(x) + +static const std::string testSourceDir = SIMPLECPP_TEST_SOURCE_DIR; + +namespace { + enum class Input : std::uint8_t { + Stringstream, + CharBuffer + }; +} + +static Input USE_INPUT = Input::Stringstream; static int numberOfFailedAssertions = 0; #define ASSERT_EQUALS(expected, actual) (assertEquals((expected), (actual), __LINE__)) -#define ASSERT_THROW(stmt, e) try { stmt; assertThrowFailed(__LINE__); } catch (const e&) {} +#define ASSERT_THROW_EQUALS(stmt, e, expected) do { try { stmt; assertThrowFailed(__LINE__); } catch (const e& ex) { assertEquals((expected), (ex.what()), __LINE__); } } while (false) + +static std::string pprint(const std::string &in) +{ + std::string ret; + for (std::string::size_type i = 0; i < in.size(); ++i) { + if (in[i] == '\n') + ret += "\\n"; + ret += in[i]; + } + return ret; +} + +static const char* inputString(Input input) { + switch (input) { + case Input::Stringstream: + return "Stringstream"; + case Input::CharBuffer: + return "CharBuffer"; + } + return ""; // unreachable - needed for GCC and Visual Studio +} static int assertEquals(const std::string &expected, const std::string &actual, int line) { if (expected != actual) { numberOfFailedAssertions++; - std::cerr << "------ assertion failed ---------" << std::endl; - std::cerr << "line " << line << std::endl; - std::cerr << "expected:" << expected << std::endl; - std::cerr << "actual:" << actual << std::endl; + std::cerr << "------ assertion failed (" << inputString(USE_INPUT) << ")---------" << std::endl; + std::cerr << "line test.cpp:" << line << std::endl; + std::cerr << "expected:" << pprint(expected) << std::endl; + std::cerr << "actual:" << pprint(actual) << std::endl; } return (expected == actual); } -static int assertEquals(const unsigned int &expected, const unsigned int &actual, int line) +static int assertEquals(const long long &expected, const long long &actual, int line) { return assertEquals(std::to_string(expected), std::to_string(actual), line); } @@ -35,10 +86,11 @@ static void assertThrowFailed(int line) std::cerr << "exception not thrown" << std::endl; } -static void testcase(const std::string &name, void (*f)(), int argc, char **argv) +static void testcase(const std::string &name, void (*f)(), int argc, char * const *argv) { - if (argc == 1) + if (argc == 1) { f(); + } else { for (int i = 1; i < argc; i++) { if (name == argv[i]) @@ -49,30 +101,78 @@ static void testcase(const std::string &name, void (*f)(), int argc, char **argv #define TEST_CASE(F) (testcase(#F, F, argc, argv)) +static simplecpp::TokenList makeTokenList(const char code[], std::size_t size, std::vector &filenames, const std::string &filename=std::string(), simplecpp::OutputList *outputList=nullptr) +{ + switch (USE_INPUT) { + case Input::Stringstream: { + std::istringstream istr(std::string(code, size)); + return {istr,filenames,filename,outputList}; + } + case Input::CharBuffer: + return {{code, size}, filenames, filename, outputList}; + } + + return simplecpp::TokenList{filenames}; // unreachable - needed for GCC and Visual Studio +} + +static simplecpp::TokenList makeTokenList(const char code[], std::vector &filenames, const std::string &filename=std::string(), simplecpp::OutputList *outputList=nullptr) +{ + return makeTokenList(code, strlen(code), filenames, filename, outputList); +} +static std::string readfile(const char code[], simplecpp::OutputList *outputList=nullptr) +{ + std::vector files; + return makeTokenList(code,files,std::string(),outputList).stringify(); +} -static std::string readfile(const char code[], int sz=-1, simplecpp::OutputList *outputList=nullptr) +static std::string readfile(const char code[], std::size_t size, simplecpp::OutputList *outputList=nullptr) { - std::istringstream istr(sz == -1 ? std::string(code) : std::string(code,sz)); std::vector files; - return simplecpp::TokenList(istr,files,std::string(),outputList).stringify(); + return makeTokenList(code,size,files,std::string(),outputList).stringify(); } -static std::string preprocess(const char code[], const simplecpp::DUI &dui, simplecpp::OutputList *outputList = NULL) +static std::string preprocess(const char code[], const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list *macroUsage = nullptr, std::list *ifCond = nullptr, const std::string &file = std::string()) { - std::istringstream istr(code); std::vector files; - std::map filedata; - simplecpp::TokenList tokens(istr,files); - tokens.removeComments(); + simplecpp::FileDataCache cache; + simplecpp::TokenList tokens = makeTokenList(code,files, file); + if (dui.removeComments) + tokens.removeComments(); simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, tokens, files, filedata, dui, outputList); + simplecpp::preprocess(tokens2, tokens, files, cache, dui, outputList, macroUsage, ifCond); + simplecpp::cleanup(cache); return tokens2.stringify(); } static std::string preprocess(const char code[]) { - return preprocess(code, simplecpp::DUI()); + return preprocess(code, simplecpp::DUI(), nullptr); +} + +static std::string preprocess(const char code[], const std::string &file) +{ + return preprocess(code, simplecpp::DUI(), nullptr, nullptr, nullptr, file); +} + +static std::string preprocess(const char code[], const simplecpp::DUI &dui) +{ + return preprocess(code, dui, nullptr); +} + +static std::string preprocess(const char code[], simplecpp::OutputList *outputList) +{ + return preprocess(code, simplecpp::DUI(), outputList); +} + +static std::string preprocess(const char code[], std::list *ifCond) +{ + return preprocess(code, simplecpp::DUI(), nullptr, nullptr, ifCond); +} + +static std::string preprocess(const char code[], std::list *macroUsage) +{ + return preprocess(code, simplecpp::DUI(), nullptr, macroUsage); } static std::string toString(const simplecpp::OutputList &outputList) @@ -105,6 +205,13 @@ static std::string toString(const simplecpp::OutputList &outputList) break; case simplecpp::Output::Type::EXPLICIT_INCLUDE_NOT_FOUND: ostr << "explicit_include_not_found,"; + break; + case simplecpp::Output::Type::FILE_NOT_FOUND: + ostr << "file_not_found,"; + break; + case simplecpp::Output::Type::DUI_ERROR: + ostr << "dui_error,"; + break; } ostr << output.msg << '\n'; @@ -117,21 +224,21 @@ static void backslash() // preprocessed differently simplecpp::OutputList outputList; - readfile("//123 \\\n456", -1, &outputList); + readfile("//123 \\\n456", &outputList); ASSERT_EQUALS("", toString(outputList)); - readfile("//123 \\ \n456", -1, &outputList); + readfile("//123 \\ \n456", &outputList); ASSERT_EQUALS("file0,1,portability_backslash,Combination 'backslash space newline' is not portable.\n", toString(outputList)); outputList.clear(); - readfile("#define A \\\n123", -1, &outputList); + readfile("#define A \\\n123", &outputList); ASSERT_EQUALS("", toString(outputList)); - readfile("#define A \\ \n123", -1, &outputList); + readfile("#define A \\ \n123", &outputList); ASSERT_EQUALS("file0,1,portability_backslash,Combination 'backslash space newline' is not portable.\n", toString(outputList)); } static void builtin() { - ASSERT_EQUALS("\"\" 1 0", preprocess("__FILE__ __LINE__ __COUNTER__")); + ASSERT_EQUALS("\"test.c\" 1 0", preprocess("__FILE__ __LINE__ __COUNTER__", "test.c")); ASSERT_EQUALS("\n\n3", preprocess("\n\n__LINE__")); ASSERT_EQUALS("\n\n0", preprocess("\n\n__COUNTER__")); ASSERT_EQUALS("\n\n0 1", preprocess("\n\n__COUNTER__ __COUNTER__")); @@ -145,15 +252,14 @@ static void builtin() static std::string testConstFold(const char code[]) { - std::istringstream istr(code); - std::vector files; - simplecpp::TokenList expr(istr, files); try { + std::vector files; + simplecpp::TokenList expr = makeTokenList(code, files); expr.constFold(); + return expr.stringify(); } catch (std::exception &) { return "exception"; } - return expr.stringify(); } static void characterLiteral() @@ -195,10 +301,10 @@ static void characterLiteral() ASSERT_EQUALS('\u0012', simplecpp::characterLiteralToLL("'\\u0012'")); ASSERT_EQUALS('\U00000012', simplecpp::characterLiteralToLL("'\\U00000012'")); - ASSERT_EQUALS(((unsigned int)(unsigned char)'b' << 8) | (unsigned char)'c', simplecpp::characterLiteralToLL("'bc'")); - ASSERT_EQUALS(((unsigned int)(unsigned char)'\x23' << 8) | (unsigned char)'\x45', simplecpp::characterLiteralToLL("'\\x23\\x45'")); - ASSERT_EQUALS(((unsigned int)(unsigned char)'\11' << 8) | (unsigned char)'\222', simplecpp::characterLiteralToLL("'\\11\\222'")); - ASSERT_EQUALS(((unsigned int)(unsigned char)'\a' << 8) | (unsigned char)'\b', simplecpp::characterLiteralToLL("'\\a\\b'")); + ASSERT_EQUALS((static_cast(static_cast('b')) << 8) | static_cast('c'), simplecpp::characterLiteralToLL("'bc'")); + ASSERT_EQUALS((static_cast(static_cast('\x23')) << 8) | static_cast('\x45'), simplecpp::characterLiteralToLL("'\\x23\\x45'")); + ASSERT_EQUALS((static_cast(static_cast('\11')) << 8) | static_cast('\222'), simplecpp::characterLiteralToLL("'\\11\\222'")); + ASSERT_EQUALS((static_cast(static_cast('\a')) << 8) | static_cast('\b'), simplecpp::characterLiteralToLL("'\\a\\b'")); if (sizeof(int) <= 4) ASSERT_EQUALS(-1, simplecpp::characterLiteralToLL("'\\xff\\xff\\xff\\xff'")); else @@ -225,14 +331,14 @@ static void characterLiteral() #ifdef __GNUC__ // BEGIN Implementation-specific results - ASSERT_EQUALS((int)('AB'), simplecpp::characterLiteralToLL("'AB'")); - ASSERT_EQUALS((int)('ABC'), simplecpp::characterLiteralToLL("'ABC'")); - ASSERT_EQUALS((int)('ABCD'), simplecpp::characterLiteralToLL("'ABCD'")); + ASSERT_EQUALS('AB', simplecpp::characterLiteralToLL("'AB'")); + ASSERT_EQUALS('ABC', simplecpp::characterLiteralToLL("'ABC'")); + ASSERT_EQUALS('ABCD', simplecpp::characterLiteralToLL("'ABCD'")); ASSERT_EQUALS('\134t', simplecpp::characterLiteralToLL("'\\134t'")); // cppcheck ticket #7452 // END Implementation-specific results #endif - ASSERT_THROW(simplecpp::characterLiteralToLL("'\\9'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'\\9'"), std::runtime_error, "invalid escape sequence"); // Input is manually encoded to (escaped) UTF-8 byte sequences // to avoid dependence on source encoding used for this file @@ -252,18 +358,20 @@ static void characterLiteral() ASSERT_EQUALS(0x157, simplecpp::characterLiteralToLL("u'\305\227'")); ASSERT_EQUALS(0xff0f, simplecpp::characterLiteralToLL("u'\357\274\217'")); ASSERT_EQUALS(0x3042, simplecpp::characterLiteralToLL("u'\343\201\202'")); - ASSERT_THROW(simplecpp::characterLiteralToLL("u'\360\223\200\200'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u'\360\223\200\200'"), std::runtime_error, "code point too large"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8'\302\265'"), std::runtime_error, "code point too large"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8'\305\227'"), std::runtime_error, "code point too large"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8'\357\274\217'"), std::runtime_error, "code point too large"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8'\343\201\202'"), std::runtime_error, "code point too large"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8'\360\223\200\200'"), std::runtime_error, "code point too large"); - ASSERT_THROW(simplecpp::characterLiteralToLL("u8'\302\265'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("u8'\305\227'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("u8'\357\274\217'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("u8'\343\201\202'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("u8'\360\223\200\200'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'\\U11111111"), std::runtime_error, "code point too large"); ASSERT_EQUALS('\x89', simplecpp::characterLiteralToLL("'\x89'")); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\x89'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\x89'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf4\x90\x80\x80'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf4\x90\x80\x80'"), std::runtime_error, "code point too large"); // following examples based on https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt ASSERT_EQUALS(0x80, simplecpp::characterLiteralToLL("U'\xc2\x80'")); @@ -279,38 +387,65 @@ static void characterLiteral() ASSERT_EQUALS(0xfffd, simplecpp::characterLiteralToLL("U'\xef\xbf\xbd'")); ASSERT_EQUALS(0x10ffff, simplecpp::characterLiteralToLL("U'\xf4\x8f\xbf\xbf'")); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\x80'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\x80\x8f'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\x80\x8f\x8f'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\x80\x8f\x8f\x8f'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\x80'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\x80\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\x80\x8f\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\x80\x8f\x8f\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xbf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xbf\x8f'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xbf\x8f\x8f'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xbf\x8f\x8f\x8f'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xbf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xbf\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xbf\x8f\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xbf\x8f\x8f\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xc0'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xc0 '"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xe0\x8f'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xe0\x8f '"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf0\x8f\x8f'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf0\x8f\x8f '"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xc0'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xc0 '"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xe0\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xe0\x8f '"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf0\x8f\x8f'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf0\x8f\x8f '"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf8'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xff'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf8'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xff'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xc0\xaf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xe0\x80\xaf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf0\x80\x80\xaf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xc1\xbf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xe0\x9f\xbf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf0\x8f\xbf\xbf'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xc0\x80'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xe0\x80\x80'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xf0\x80\x80\x80'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xc0\xaf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xe0\x80\xaf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf0\x80\x80\xaf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xc1\xbf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xe0\x9f\xbf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf0\x8f\xbf\xbf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xc0\x80'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xe0\x80\x80'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xf0\x80\x80\x80'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xed\xa0\x80'"), std::runtime_error); - ASSERT_THROW(simplecpp::characterLiteralToLL("U'\xed\xbf\xbf'"), std::runtime_error); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xed\xa0\x80'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U'\xed\xbf\xbf'"), std::runtime_error, "assumed UTF-8 encoded source, but sequence is invalid"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL(""), std::runtime_error, "expected a character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("LU"), std::runtime_error, "expected a character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL(";\n"), std::runtime_error, "expected a character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8U"), std::runtime_error, "expected a character literal"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'\n\n"), std::runtime_error, "raw single quotes and newlines not allowed in character literals"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("''&"), std::runtime_error, "raw single quotes and newlines not allowed in character literals"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("L'\fff"), std::runtime_error, "multiple characters only supported in narrow character literals"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'\\\n"), std::runtime_error, "unexpected end of character literal"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'"), std::runtime_error, "missing closing quote in character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u'"), std::runtime_error, "missing closing quote in character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("L'"), std::runtime_error, "missing closing quote in character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'a"), std::runtime_error, "missing closing quote in character literal"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("''"), std::runtime_error, "empty character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("L''"), std::runtime_error, "empty character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("U''"), std::runtime_error, "empty character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u''"), std::runtime_error, "empty character literal"); + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u8''"), std::runtime_error, "empty character literal"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("'\\555"), std::runtime_error, "numeric escape sequence too large"); + + ASSERT_THROW_EQUALS(simplecpp::characterLiteralToLL("u'Ó"), std::runtime_error, "assumed UTF-8 encoded source, but character literal ends unexpectedly"); } static void combineOperators_floatliteral() @@ -336,6 +471,9 @@ static void combineOperators_floatliteral() ASSERT_EQUALS("0x1p+3f", preprocess("0x1p+3f")); ASSERT_EQUALS("0x1p+3L", preprocess("0x1p+3L")); ASSERT_EQUALS("1p + 3", preprocess("1p+3")); + ASSERT_EQUALS("1.0_a . b", preprocess("1.0_a.b")); + ASSERT_EQUALS("1_a . b", preprocess("1_a.b")); + ASSERT_EQUALS("bool x = d != 0. and b ;", preprocess("bool x = d != 0. and b;")); } static void combineOperators_increment() @@ -375,10 +513,39 @@ static void comment() static void comment_multiline() { + simplecpp::DUI dui; + dui.removeComments = true; + const char code[] = "#define ABC {// \\\n" "}\n" "void f() ABC\n"; - ASSERT_EQUALS("\n\nvoid f ( ) { }", preprocess(code)); + ASSERT_EQUALS("\n\nvoid f ( ) {", preprocess(code, dui)); + + const char code1[] = "#define ABC {// \\\r\n" + "}\n" + "void f() ABC\n"; + ASSERT_EQUALS("\n\nvoid f ( ) {", preprocess(code1, dui)); + + const char code2[] = "#define A 1// \\\r" + "\r" + "2\r" + "A\r"; + ASSERT_EQUALS("\n\n2\n1", preprocess(code2, dui)); + + const char code3[] = "void f() {// \\ \n}\n"; + ASSERT_EQUALS("void f ( ) {", preprocess(code3, dui)); + + const char code4[] = "void f() {// \\\\\\\t\t\n}\n"; + ASSERT_EQUALS("void f ( ) {", preprocess(code4, dui)); + + const char code5[] = "void f() {// \\\\\\a\n}\n"; + ASSERT_EQUALS("void f ( ) {\n}", preprocess(code5, dui)); + + const char code6[] = "void f() {// \\\n\n\n}\n"; + ASSERT_EQUALS("void f ( ) {\n\n\n}", preprocess(code6, dui)); + + // #471 ensure there is newline in comment so that line-splicing can be detected by tools + ASSERT_EQUALS("// abc\ndef", readfile("// abc\\\ndef")); } @@ -398,6 +565,7 @@ static void constFold() ASSERT_EQUALS("exception", testConstFold("?2:3")); } +#ifdef __CYGWIN__ static void convertCygwinPath() { // absolute paths @@ -415,6 +583,7 @@ static void convertCygwinPath() ASSERT_EQUALS("\\cygdrive", simplecpp::convertCygwinToWindowsPath("/cygdrive")); ASSERT_EQUALS("\\cygdrive\\", simplecpp::convertCygwinToWindowsPath("/cygdrive/")); } +#endif static void define1() { @@ -480,9 +649,12 @@ static void define6() static void define7() { + simplecpp::DUI dui; + dui.removeComments = true; + const char code[] = "#define A(X) X+1\n" "A(1 /*23*/)"; - ASSERT_EQUALS("\n1 + 1", preprocess(code)); + ASSERT_EQUALS("\n1 + 1", preprocess(code, dui)); } static void define8() // 6.10.3.10 @@ -518,27 +690,51 @@ static void define11() // location of expanded argument ASSERT_EQUALS("\n#line 10 \"cppcheck.cpp\"\n1 ;", preprocess(code)); } +static void define12() +{ + const char code[] = "struct foo x = {\n" + " #define V 0\n" + " .x = V,\n" + "};\n"; + ASSERT_EQUALS("struct foo x = {\n" + "# define V 0\n" + ". x = V ,\n" + "} ;", readfile(code)); + ASSERT_EQUALS("struct foo x = {\n" + "\n" + ". x = 0 ,\n" + "} ;", preprocess(code)); +} + +static void define13() +{ + const char code[] = "#define M 180.\n" + "extern void g();\n" + "void f(double d) {\n" + " if (d > M) {}\n" + "}\n"; + ASSERT_EQUALS("\nextern void g ( ) ;\n" + "void f ( double d ) {\n" + "if ( d > 180. ) { }\n" + "}", preprocess(code)); +} + + static void define_invalid_1() { - std::istringstream istr("#define A(\nB\n"); - std::vector files; - std::map filedata; + const char code[] = "#define A(\nB\n"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); - ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, bad macro syntax\n", toString(outputList)); } static void define_invalid_2() { - std::istringstream istr("#define\nhas#"); - std::vector files; - std::map filedata; + const char code[] = "#define\nhas#"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); - ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, bad macro syntax\n", toString(outputList)); } static void define_define_1() @@ -640,6 +836,24 @@ static void define_define_11() ASSERT_EQUALS("\n\n\n\nP2DIR ;", preprocess(code)); } +static void define_define_11a() +{ + const char code[] = "#define A_B_C 0x1\n" + "#define A_ADDRESS 0x00001000U\n" + "#define A ((uint32_t ) A_ADDRESS)\n" + "#define CONCAT(x, y, z) x ## _ ## y ## _ ## z\n" + "#define TEST_MACRO CONCAT(A, B, C)\n" + "TEST_MACRO\n"; + ASSERT_EQUALS("\n\n\n\n\n0x1", preprocess(code)); + + const char code2[] = "#define ADDER_S(a, b) a + b\n" // #374 + "#define ADDER(x) ADDER_S(x)\n" + "#define ARGUMENTS 1, 2\n" + "#define RUN ADDER(ARGUMENTS)\n" + "void f() { RUN; }\n"; + ASSERT_EQUALS("\n\n\n\nvoid f ( ) { 1 + 2 ; }", preprocess(code2)); +} + static void define_define_12() { const char code[] = "#define XY(Z) Z\n" @@ -691,6 +905,85 @@ static void define_define_17() ASSERT_EQUALS("\n\n1 ;", preprocess(code)); } +static void define_define_18() +{ + const char code[] = "#define FOO(v) BAR(v, 0)\n" + "#define BAR(v, x) (v)\n" + "#define var (p->var)\n" + "FOO(var);"; + ASSERT_EQUALS("\n\n\n( ( p -> var ) ) ;", preprocess(code)); +} + +static void define_define_19() // #292 +{ + const char code[] = "#define X 1,2,3\n" + "#define Foo(A, B) A\n" + "#define Bar Foo(X, 0)\n" + "Bar\n"; + ASSERT_EQUALS("\n\n\n1 , 2 , 3", preprocess(code)); +} + +static void define_define_20() // #384 arg contains comma +{ + const char code[] = "#define Z_IS_ENABLED1(config_macro) Z_IS_ENABLED2(_XXXX##config_macro)\n" + "#define _XXXX1 _YYYY,\n" + "#define Z_IS_ENABLED2(one_or_two_args) Z_IS_ENABLED3(one_or_two_args 1, 0)\n" + "#define Z_IS_ENABLED3(ignore_this, val, ...) val\n" + "#define IS_ENABLED(config_macro) Z_IS_ENABLED1(config_macro)\n" + "#define FEATURE 1\n" + "a = IS_ENABLED(FEATURE)\n"; + ASSERT_EQUALS("\n\n\n\n\n\na = 1", preprocess(code)); +} + +static void define_define_21() // #397 DEBRACKET macro +{ + const char code1[] = "#define A(val) B val\n" + "#define B(val) val\n" + "A((2))\n"; + ASSERT_EQUALS("\n\n2", preprocess(code1)); + + const char code2[] = "#define x (2)\n" + "#define A B x\n" + "#define B(val) val\n" + "A\n"; + ASSERT_EQUALS("\n\n\nB ( 2 )", preprocess(code2)); + + const char code3[] = "#define __GET_ARG2_DEBRACKET(ignore_this, val, ...) __DEBRACKET val\n" + "#define __DEBRACKET(...) __VA_ARGS__\n" + "#5 \"a.c\"\n" + "__GET_ARG2_DEBRACKET(432 (33), (B))\n"; + ASSERT_EQUALS("\n#line 5 \"a.c\"\nB", preprocess(code3)); +} + +static void define_define_22() // #400 inner macro not expanded after hash hash +{ + const char code[] = "#define FOO(a) CAT(DO, STUFF)(1,2)\n" + "#define DOSTUFF(a, b) CAT(3, 4)\n" + "#define CAT(a, b) a##b\n" + "FOO(1)\n"; + ASSERT_EQUALS("\n\n\n34", preprocess(code)); +} + +static void define_define_23() // #403 crash (infinite recursion) +{ + const char code[] = "#define C_(x, y) x ## y\n" + "#define C(x, y) C_(x, y)\n" + "#define X(func) C(Y, C(func, Z))\n" + "#define die X(die)\n" + "die(void);\n"; + ASSERT_EQUALS("\n\n\n\nYdieZ ( void ) ;", preprocess(code)); +} + +static void define_define_24() // #590 +{ + const char code[] = "#define B A\n" + "#define A x(B)\n" + "#define C(s) s\n" + "#define D(s) C(s)\n" + "D(A)\n"; + ASSERT_EQUALS("\n\n\n\nx ( A )", preprocess(code)); +} + static void define_va_args_1() { const char code[] = "#define A(fmt...) dostuff(fmt)\n" @@ -712,6 +1005,153 @@ static void define_va_args_3() // min number of arguments ASSERT_EQUALS("\n1", preprocess(code)); } +static void define_va_args_4() // cppcheck trac #9754 +{ + const char code[] = "#define A(x, y, ...) printf(x, y, __VA_ARGS__)\n" + "A(1, 2)\n"; + ASSERT_EQUALS("\nprintf ( 1 , 2 )", preprocess(code)); +} + +static void define_va_opt_1() +{ + const char code[] = "#define p1(fmt, args...) printf(fmt __VA_OPT__(,) args)\n" + "p1(\"hello\");\n" + "p1(\"%s\", \"hello\");\n"; + + ASSERT_EQUALS("\nprintf ( \"hello\" ) ;\n" + "printf ( \"%s\" , \"hello\" ) ;", + preprocess(code)); +} + +static void define_va_opt_2() +{ + const char code[] = "#define err(...)\\\n" + "__VA_OPT__(\\\n" + "printf(__VA_ARGS__);\\\n" + ")\n" + "#define err2(something, ...) __VA_OPT__(err(__VA_ARGS__))\n" + "err2(test)\n" + "err2(test, \"%d\", 2)\n"; + + ASSERT_EQUALS("\n\n\n\n\n\nprintf ( \"%d\" , 2 ) ;", preprocess(code)); +} + +static void define_va_opt_3() +{ + // non-escaped newline without closing parenthesis + const char code1[] = "#define err(...) __VA_OPT__(printf( __VA_ARGS__);\n" + ")\n" + "err()"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code1, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing closing parenthesis for __VA_OPT__\n", + toString(outputList)); + + outputList.clear(); + + // non-escaped newline without open parenthesis + const char code2[] = "#define err(...) __VA_OPT__\n" + "(something)\n" + "err()"; + + ASSERT_EQUALS("", preprocess(code2, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); +} + +static void define_va_opt_4() +{ + // missing parenthesis + const char code1[] = "#define err(...) __VA_OPT__ something\n" + "err()"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code1, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); + + outputList.clear(); + + // missing open parenthesis + const char code2[] = "#define err(...) __VA_OPT__ something)\n" + "err()"; + + ASSERT_EQUALS("", preprocess(code2, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); +} + +static void define_va_opt_5() +{ + // parenthesis not directly proceeding __VA_OPT__ + const char code[] = "#define err(...) __VA_OPT__ something (something)\n" + "err()"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); +} + +static void define_va_opt_6() +{ + // nested __VA_OPT__ + const char code[] = "#define err(...) __VA_OPT__(__VA_OPT__(something))\n" + "err()"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': __VA_OPT__ cannot be nested\n", + toString(outputList)); +} + +static void define_va_opt_7() +{ + // eof in __VA_OPT__ + const char code1[] = "#define err(...) __VA_OPT__"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code1, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing opening parenthesis for __VA_OPT__\n", + toString(outputList)); + + outputList.clear(); + + const char code2[] = "#define err(...) __VA_OPT__("; + + ASSERT_EQUALS("", preprocess(code2, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing closing parenthesis for __VA_OPT__\n", + toString(outputList)); + + outputList.clear(); + + const char code3[] = "#define err(...) __VA_OPT__(x"; + + ASSERT_EQUALS("", preprocess(code3, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,Failed to parse #define, In definition of 'err': Missing closing parenthesis for __VA_OPT__\n", + toString(outputList)); +} + +static void define_va_opt_8() +{ + const char code[] = "#define f(...) #__VA_OPT__(x)\n" + "const char* v1 = f();"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("\nconst char * v1 = \"\" ;", preprocess(code, &outputList)); + ASSERT_EQUALS("", toString(outputList)); +} + +static void define_va_opt_9() +{ + simplecpp::DUI dui; + dui.defines.emplace_back("f(...)=__VA_OPT__"); + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess("", dui, &outputList)); + ASSERT_EQUALS("file0,0,dui_error,In definition of 'f': Missing opening parenthesis for __VA_OPT__\n", toString(outputList)); +} + static void define_ifdef() { const char code[] = "#define A(X) X\n" @@ -720,13 +1160,27 @@ static void define_ifdef() "#endif\n" ")\n"; - const simplecpp::DUI dui; simplecpp::OutputList outputList; - preprocess(code, dui, &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,3,syntax_error,failed to expand 'A', it is invalid to use a preprocessor directive as macro parameter\n", toString(outputList)); } +static void pragma_backslash() +{ + const char code[] = "#pragma comment (longstring, \\\n" + "\"HEADER\\\n" + "This is a very long string that is\\\n" + "a multi-line string.\\\n" + "How much more do I have to say?\\\n" + "Well, be prepared, because the\\\n" + "story is just beginning. This is a test\\\n" + "string for demonstration purposes. \")\n"; + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); +} + static void dollar() { ASSERT_EQUALS("$ab", readfile("$ab")); @@ -735,93 +1189,87 @@ static void dollar() static void error1() { - std::istringstream istr("#error hello world!\n"); - std::vector files; - std::map filedata; + const char code[] = "#error hello world!\n"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,1,#error,#error hello world!\n", toString(outputList)); } static void error2() { - std::istringstream istr("#error it's an error\n"); - std::vector files; - std::map filedata; + const char code[] = "#error it's an error\n"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,1,#error,#error it's an error\n", toString(outputList)); } static void error3() { - std::istringstream istr("#error \"bla bla\\\n" - " bla bla.\"\n"); + const char code[] = "#error \"bla bla\\\n" + " bla bla.\"\n"; std::vector files; simplecpp::OutputList outputList; - simplecpp::TokenList rawtokens(istr, files, "test.c", &outputList); + const simplecpp::TokenList rawtokens = makeTokenList(code, files, "test.c", &outputList); ASSERT_EQUALS("", toString(outputList)); } static void error4() { // "#error x\n1" - std::istringstream istr(std::string("\xFE\xFF\x00\x23\x00\x65\x00\x72\x00\x72\x00\x6f\x00\x72\x00\x20\x00\x78\x00\x0a\x00\x31", 22)); + const char code[] = "\xFE\xFF\x00\x23\x00\x65\x00\x72\x00\x72\x00\x6f\x00\x72\x00\x20\x00\x78\x00\x0a\x00\x31"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); + const simplecpp::TokenList rawtoken = makeTokenList(code, sizeof(code),files,"test.c"); + simplecpp::preprocess(tokens2, rawtoken, files, cache, simplecpp::DUI(), &outputList); ASSERT_EQUALS("file0,1,#error,#error x\n", toString(outputList)); } static void error5() { // "#error x\n1" - std::istringstream istr(std::string("\xFF\xFE\x23\x00\x65\x00\x72\x00\x72\x00\x6f\x00\x72\x00\x20\x00\x78\x00\x0a\x00\x78\x00\x31\x00", 22)); + const char code[] = "\xFF\xFE\x23\x00\x65\x00\x72\x00\x72\x00\x6f\x00\x72\x00\x20\x00\x78\x00\x0a\x00\x78\x00\x31\x00"; std::vector files; - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); + const simplecpp::TokenList rawtokens = makeTokenList(code, sizeof(code),files,"test.c"); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI(), &outputList); ASSERT_EQUALS("file0,1,#error,#error x\n", toString(outputList)); } static void garbage() { - const simplecpp::DUI dui; simplecpp::OutputList outputList; outputList.clear(); - preprocess("#ifdef\n", dui, &outputList); + ASSERT_EQUALS("", preprocess("#ifdef\n", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,Syntax error in #ifdef\n", toString(outputList)); outputList.clear(); - preprocess("#define TEST2() A ##\nTEST2()\n", dui, &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'TEST2', Invalid ## usage when expanding 'TEST2'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess("#define TEST2() A ##\nTEST2()\n", &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'TEST2', Invalid ## usage when expanding 'TEST2': Unexpected newline\n", toString(outputList)); outputList.clear(); - preprocess("#define CON(a,b) a##b##\nCON(1,2)\n", dui, &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'CON', Invalid ## usage when expanding 'CON'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess("#define CON(a,b) a##b##\nCON(1,2)\n", &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'CON', Invalid ## usage when expanding 'CON': Unexpected newline\n", toString(outputList)); } static void garbage_endif() { - const simplecpp::DUI dui; simplecpp::OutputList outputList; outputList.clear(); - preprocess("#elif A<0\n", dui, &outputList); + ASSERT_EQUALS("", preprocess("#elif A<0\n", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,#elif without #if\n", toString(outputList)); outputList.clear(); - preprocess("#else\n", dui, &outputList); + ASSERT_EQUALS("", preprocess("#else\n", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,#else without #if\n", toString(outputList)); outputList.clear(); - preprocess("#endif\n", dui, &outputList); + ASSERT_EQUALS("", preprocess("#endif\n", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,#endif without #if\n", toString(outputList)); } @@ -842,6 +1290,17 @@ static void hash() preprocess("#define A(x) (x)\n" "#define B(x) A(#x)\n" "B(123)")); + + ASSERT_EQUALS("\n\nprintf ( \"bar(3)\" \"\\n\" ) ;", + preprocess("#define bar(x) x % 2\n" + "#define foo(x) printf(#x \"\\n\")\n" + "foo(bar(3));")); + + ASSERT_EQUALS("\n\n\n\"Y Y\"", + preprocess("#define X(x,y) x y\n" + "#define STR_(x) #x\n" + "#define STR(x) STR_(x)\n" + "STR(X(Y,Y))")); } static void hashhash1() // #4703 @@ -881,6 +1340,16 @@ static void hashhash4() // nonstandard gcc/clang extension for empty varargs ASSERT_EQUALS("\n\na ( 1 ) ;", preprocess(code)); } +static void hashhash4a() +{ + const char code[] = "#define GETMYID(a) ((a))+1\n" + "#define FIGHT_FOO(c, ...) foo(c, ##__VA_ARGS__)\n" + "#define FIGHT_BAR(c, args...) bar(c, ##args)\n" + "FIGHT_FOO(1, GETMYID(a));\n" + "FIGHT_BAR(1, GETMYID(b));"; + ASSERT_EQUALS("\n\n\nfoo ( 1 , ( ( a ) ) + 1 ) ;\nbar ( 1 , ( ( b ) ) + 1 ) ;", preprocess(code)); +} + static void hashhash5() { ASSERT_EQUALS("x1", preprocess("x##__LINE__")); @@ -943,26 +1412,25 @@ static void hashhash9() "void operator >>= ( void ) { x = x >> 1 ; } ;"; ASSERT_EQUALS(expected, preprocess(code)); - const simplecpp::DUI dui; simplecpp::OutputList outputList; code = "#define A +##x\n" "A"; outputList.clear(); - preprocess(code, dui, &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A': Combining '+' and 'x' yields an invalid token.\n", toString(outputList)); code = "#define A 2##=\n" "A"; outputList.clear(); - preprocess(code, dui, &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A': Combining '2' and '=' yields an invalid token.\n", toString(outputList)); code = "#define A <<##x\n" "A"; outputList.clear(); - preprocess(code, dui, &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A': Combining '<<' and 'x' yields an invalid token.\n", toString(outputList)); } static void hashhash10() @@ -1027,56 +1495,342 @@ static void hashhash13() ASSERT_EQUALS("\n& ab", preprocess(code2)); } +static void hashhash_string_literal() +{ + const char code[] = + "#define UL(x) x##_ul\n" + "\"ABC\"_ul;\n" + "UL(\"ABC\");"; + + ASSERT_EQUALS("\n\"ABC\" _ul ;\n\"ABC\" _ul ;", preprocess(code)); +} + +static void hashhash_string_wrapped() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "#define STR(x) CONCAT(x,s)\n" + "STR(\"ABC\");"; + + ASSERT_EQUALS("\n\n\"ABC\" s ;", preprocess(code)); +} + +static void hashhash_char_literal() +{ + const char code[] = + "#define CH(x) x##_ch\n" + "CH('a');"; + + ASSERT_EQUALS("\n'a' _ch ;", preprocess(code)); +} + +static void hashhash_multichar_literal() +{ + const char code[] = + "#define CH(x) x##_ch\n" + "CH('abcd');"; + + ASSERT_EQUALS("\n'abcd' _ch ;", preprocess(code)); +} + +static void hashhash_char_escaped() +{ + const char code[] = + "#define CH(x) x##_ch\n" + "CH('\\'');"; + + ASSERT_EQUALS("\n'\\'' _ch ;", preprocess(code)); +} + +static void hashhash_string_nothing() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "CONCAT(\"ABC\",);"; + + ASSERT_EQUALS("\n\"ABC\" ;", preprocess(code)); +} + +static void hashhash_string_char() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "CONCAT(\"ABC\", 'c');"; + + // This works, but maybe shouldn't since the result isn't useful. + ASSERT_EQUALS("\n\"ABC\" 'c' ;", preprocess(code)); +} + +static void hashhash_string_name() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "#define LIT _literal\n" + "CONCAT(\"string\", LIT);"; + + // TODO is this correct? clang fails because that's not really a valid thing but gcc seems to accept it + // see https://gist.github.com/patrickdowling/877a25294f069bf059f3b07f9b5b7039 + + ASSERT_EQUALS("\n\n\"string\" LIT ;", preprocess(code)); +} + +static void hashhashhash_int_literal() +{ + const char code[] = + "#define CONCAT(a,b,c) a##b##c\n" + "#define PASTER(a,b,c) CONCAT(a,b,c)\n" + "PASTER(\"123\",_i,ul);"; + + ASSERT_EQUALS("\n\n\"123\" _iul ;", preprocess(code)); +} + +static void hashhash_int_literal() +{ + const char code[] = + "#define PASTE(a,b) a##b\n" + "PASTE(123,_i);\n" + "1234_i;\n"; + + ASSERT_EQUALS("\n123_i ;\n1234_i ;", preprocess(code)); +} + static void hashhash_invalid_1() { - std::istringstream istr("#define f(a) (##x)\nf(1)"); - std::vector files; - std::map filedata; + const char code[] = "#define f(a) (##x)\nf(1)"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f': Unexpected token '('\n", toString(outputList)); } static void hashhash_invalid_2() { - std::istringstream istr("#define f(a) (x##)\nf(1)"); - std::vector files; - std::map filedata; + const char code[] = "#define f(a) (x##)\nf(1)"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); - ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f'.\n", toString(outputList)); + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f': Unexpected token ')'\n", toString(outputList)); +} + +static void hashhash_invalid_string_number() +{ + const char code[] = + "#define BAD(x) x##12345\nBAD(\"ABC\")"; + + simplecpp::OutputList outputList; + preprocess(code, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'BAD', Invalid ## usage when expanding 'BAD': Combining '\"ABC\"' and '12345' yields an invalid token.\n", toString(outputList)); +} + +static void hashhash_invalid_missing_args() +{ + const char code[] = + "#define BAD(x) ##x\nBAD()"; + + simplecpp::OutputList outputList; + preprocess(code, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'BAD', Invalid ## usage when expanding 'BAD': Missing first argument\n", toString(outputList)); +} + +static void hashhash_null_stmt() +{ + const char code[] = + "# define B(x) C ## x\n" + "#\n" + "# define C0 1\n" + "\n" + "B(0);\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("\n\n\n\n1 ;", preprocess(code, &outputList)); +} + +static void hashhash_empty_va_args() +{ + // #395 hash hash with an empty __VA_ARGS__ in a macro + const char code[] = + "#define CAT(a, ...) a##__VA_ARGS__\n" + "#define X(a, ...) CAT(a)\n" + "#define LEVEL_2 (2)\n" + "X(LEVEL_2)\n"; + ASSERT_EQUALS("\n\n\n( 2 )", preprocess(code)); +} + +static void hashhash_universal_character() +{ + const char code[] = + "#define A(x,y) x##y\nint A(\\u01,04);"; + simplecpp::OutputList outputList; + preprocess(code, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'A', Invalid ## usage when expanding 'A': Combining '\\u01' and '04' yields universal character '\\u0104'. This is undefined behavior according to C standard chapter 5.1.1.2, paragraph 4.\n", toString(outputList)); } static void has_include_1() { const char code[] = "#ifdef __has_include\n" - " #ifdef __has_include(\"simplecpp.h\")\n" + " #if __has_include(\"simplecpp.h\")\n" " A\n" " #else\n" " B\n" " #endif\n" "#endif"; simplecpp::DUI dui; + dui.includePaths.emplace_back(testSourceDir); + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); // we default to latest standard internally + dui.std = "c++14"; + ASSERT_EQUALS("", preprocess(code, dui)); dui.std = "c++17"; ASSERT_EQUALS("\n\nA", preprocess(code, dui)); - ASSERT_EQUALS("", preprocess(code)); + dui.std = "c++20"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); } static void has_include_2() { const char code[] = "#if defined( __has_include)\n" - " #ifdef __has_include(\"simplecpp.h\")\n" + " #if /*comment*/ __has_include /*comment*/(\"simplecpp.h\") // comment\n" " A\n" " #else\n" " B\n" " #endif\n" "#endif"; simplecpp::DUI dui; + dui.removeComments = true; // TODO: remove this + dui.includePaths.emplace_back(testSourceDir); + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); // we default to latest standard internally + dui.std = "c++14"; + ASSERT_EQUALS("", preprocess(code, dui)); dui.std = "c++17"; ASSERT_EQUALS("\n\nA", preprocess(code, dui)); - ASSERT_EQUALS("", preprocess(code)); + dui.std = "c++20"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); +} + +static void has_include_3() +{ + const char code[] = "#ifdef __has_include\n" + " #if __has_include()\n" + " A\n" + " #else\n" + " B\n" + " #endif\n" + "#endif"; + simplecpp::DUI dui; + + // Test file not found... + ASSERT_EQUALS("\n\n\n\nB", preprocess(code, dui)); // we default to latest standard internally + dui.std = "c++14"; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "c++17"; + ASSERT_EQUALS("\n\n\n\nB", preprocess(code, dui)); + + // Unless -I is set (preferably, we should differentiate -I and -isystem...) + dui.includePaths.emplace_back(testSourceDir + "/testsuite"); + dui.std = ""; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); // we default to latest standard internally + dui.std = "c++14"; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "c++17"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); + dui.std = "c++20"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); +} + +static void has_include_4() +{ + const char code[] = "#ifdef __has_include\n" + " #if __has_include(\"testsuite/realFileName1.cpp\")\n" + " A\n" + " #else\n" + " B\n" + " #endif\n" + "#endif"; + simplecpp::DUI dui; + dui.includePaths.emplace_back(testSourceDir); // we default to latest standard internally + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); + dui.std = "c++14"; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "c++17"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); + dui.std = "c++20"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); +} + +static void has_include_5() +{ + const char code[] = "#if defined( __has_include)\n" + " #if !__has_include()\n" + " A\n" + " #else\n" + " B\n" + " #endif\n" + "#endif"; + simplecpp::DUI dui; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); // we default to latest standard internally + dui.includePaths.emplace_back(testSourceDir); + dui.std = "c++14"; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "c++17"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); + dui.std = "c++20"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); +} + +static void has_include_6() +{ + const char code[] = "#if defined( __has_include)\n" + " #if !__has_include()\n" + " A\n" + " #else\n" + " B\n" + " #endif\n" + "#endif"; + simplecpp::DUI dui; + dui.includePaths.emplace_back(testSourceDir); + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); // we default to latest standard internally + dui.std = "c++99"; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "gnu99"; + ASSERT_EQUALS("\n\nA", preprocess(code, dui)); +} + +static void strict_ansi_1() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "gnu99"; + ASSERT_EQUALS("", preprocess(code, dui)); +} + +static void strict_ansi_2() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "c99"; + ASSERT_EQUALS("\nA", preprocess(code, dui)); +} + +static void strict_ansi_3() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "c99"; + dui.undefined.insert("__STRICT_ANSI__"); + ASSERT_EQUALS("", preprocess(code, dui)); +} + +static void strict_ansi_4() +{ + const char code[] = "#if __STRICT_ANSI__\n" + " A\n" + "#endif"; + simplecpp::DUI dui; + dui.std = "gnu99"; + dui.defines.emplace_back("__STRICT_ANSI__"); + ASSERT_EQUALS("\nA", preprocess(code, dui)); } static void ifdef1() @@ -1122,7 +1876,7 @@ static void ifA() ASSERT_EQUALS("", preprocess(code)); simplecpp::DUI dui; - dui.defines.push_back("A=1"); + dui.defines.emplace_back("A=1"); ASSERT_EQUALS("\nX", preprocess(code, dui)); } @@ -1141,7 +1895,7 @@ static void ifDefined() "#endif"; simplecpp::DUI dui; ASSERT_EQUALS("", preprocess(code, dui)); - dui.defines.push_back("A=1"); + dui.defines.emplace_back("A=1"); ASSERT_EQUALS("\nX", preprocess(code, dui)); } @@ -1152,7 +1906,7 @@ static void ifDefinedNoPar() "#endif"; simplecpp::DUI dui; ASSERT_EQUALS("", preprocess(code, dui)); - dui.defines.push_back("A=1"); + dui.defines.emplace_back("A=1"); ASSERT_EQUALS("\nX", preprocess(code, dui)); } @@ -1164,7 +1918,7 @@ static void ifDefinedNested() "#endif"; simplecpp::DUI dui; ASSERT_EQUALS("", preprocess(code, dui)); - dui.defines.push_back("FOO=1"); + dui.defines.emplace_back("FOO=1"); ASSERT_EQUALS("\n\nX", preprocess(code, dui)); } @@ -1176,33 +1930,23 @@ static void ifDefinedNestedNoPar() "#endif"; simplecpp::DUI dui; ASSERT_EQUALS("", preprocess(code, dui)); - dui.defines.push_back("FOO=1"); + dui.defines.emplace_back("FOO=1"); ASSERT_EQUALS("\n\nX", preprocess(code, dui)); } static void ifDefinedInvalid1() // #50 - invalid unterminated defined { const char code[] = "#if defined(A"; - simplecpp::DUI dui; simplecpp::OutputList outputList; - std::vector files; - simplecpp::TokenList tokens2(files); - std::istringstream istr(code); - std::map filedata; - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui, &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition\n", toString(outputList)); } static void ifDefinedInvalid2() { const char code[] = "#if defined"; - simplecpp::DUI dui; simplecpp::OutputList outputList; - std::vector files; - simplecpp::TokenList tokens2(files); - std::istringstream istr(code); - std::map filedata; - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui, &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition\n", toString(outputList)); } @@ -1215,16 +1959,22 @@ static void ifDefinedHashHash() "#else\n" "#error FOO is not enabled\n" "#endif\n"; - simplecpp::DUI dui; simplecpp::OutputList outputList; - std::vector files; - simplecpp::TokenList tokens2(files); - std::istringstream istr(code); - std::map filedata; - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui, &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,4,#error,#error FOO is enabled\n", toString(outputList)); } +static void ifDefinedHashHash2() +{ + // #409 + // do not crash when expanding P() (as ## rhs is "null") + // note: gcc outputs "defined E" + const char code[] = "#define P(p)defined E##p\n" + "P()\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("\n0", preprocess(code, &outputList)); +} + static void ifLogical() { const char code[] = "#if defined(A) || defined(B)\n" @@ -1233,10 +1983,10 @@ static void ifLogical() simplecpp::DUI dui; ASSERT_EQUALS("", preprocess(code, dui)); dui.defines.clear(); - dui.defines.push_back("A=1"); + dui.defines.emplace_back("A=1"); ASSERT_EQUALS("\nX", preprocess(code, dui)); dui.defines.clear(); - dui.defines.push_back("B=1"); + dui.defines.emplace_back("B=1"); ASSERT_EQUALS("\nX", preprocess(code, dui)); } @@ -1338,19 +2088,28 @@ static void ifalt() // using "and", "or", etc static void ifexpr() { - const char *code = "#define MACRO() (1)\n" - "#if ~MACRO() & 8\n" - "1\n" - "#endif"; + const char code[] = "#define MACRO() (1)\n" + "#if ~MACRO() & 8\n" + "1\n" + "#endif"; ASSERT_EQUALS("\n\n1", preprocess(code)); } +static void ifUndefFuncStyleMacro() +{ + const char code[] = "#if A()\n" + "#endif\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition, undefined function-like macro invocation: A( ... )\n", toString(outputList)); +} + static void location1() { const char *code; code = "# 1 \"main.c\"\n\n\n" - "x"; + "x"; ASSERT_EQUALS("\n#line 3 \"main.c\"\nx", preprocess(code)); } @@ -1394,71 +2153,212 @@ static void location4() ASSERT_EQUALS("\n#line 1 \"abc\\def.g\"\na", preprocess(code)); } +static void location5() +{ + // https://sourceforge.net/p/cppcheck/discussion/general/thread/eccf020a13/ + const char *code; + code = "#line 10 \"/a/Attribute/parser/FilterParser.y\" // lalr1.cc:377\n" + "int x;\n"; + ASSERT_EQUALS("\n#line 10 \"/a/Attribute/parser/FilterParser.y\"\n" + "int x ;", preprocess(code)); +} + +static void location6() +{ + const char code[] = + "#line 3\n" + "__LINE__ __FILE__\n"; + ASSERT_EQUALS("\n" + "\n" + "3 \"\"", + preprocess(code)); +} + +static void location7() +{ + const char code[] = + "#line 3 \"file.c\"\n" + "__LINE__ __FILE__\n"; + ASSERT_EQUALS("\n" + "#line 3 \"file.c\"\n" + "3 \"file.c\"", + preprocess(code)); +} + +static void location8() +{ + const char code[] = + "# 3\n" + "__LINE__ __FILE__\n"; + ASSERT_EQUALS("\n" + "2 \"\"", // TODO: should say 3 + preprocess(code)); +} + +static void location9() +{ + const char code[] = + "# 3 \"file.c\"\n" + "__LINE__ __FILE__\n"; + ASSERT_EQUALS("\n" + "#line 3 \"file.c\"\n" + "3 \"file.c\"", + preprocess(code)); +} + +static void location10() +{ + const char code[] = + "#line 3\n" + "__LINE__ __FILE__\n"; + ASSERT_EQUALS("\n" + "\n" // TODO: should this have the #line marker? + "3 \"\"", + preprocess(code)); +} + +static void location11() +{ + const char code[] = + "#line 3 \"file.c\"\n" + "__LINE__ __FILE__\n" + "#line 33 \"file2.c\"\n" + "__LINE__ __FILE__\n"; + ASSERT_EQUALS("\n" + "#line 3 \"file.c\"\n" + "3 \"file.c\"\n" + "#line 33 \"file2.c\"\n" + "33 \"file2.c\"", + preprocess(code)); +} + +// TODO: test #file/#endfile + static void missingHeader1() { - const simplecpp::DUI dui; - std::istringstream istr("#include \"notexist.h\"\n"); - std::vector files; - std::map filedata; + const char code[] = "#include \"notexist.h\"\n"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui, &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,1,missing_header,Header not found: \"notexist.h\"\n", toString(outputList)); } static void missingHeader2() { - const simplecpp::DUI dui; - std::istringstream istr("#include \"foo.h\"\n"); // this file exists + const char code[] = "#include \"foo.h\"\n"; // this file exists std::vector files; - std::map filedata; - filedata["foo.h"] = NULL; + simplecpp::FileDataCache cache; + cache.insert({"foo.h", simplecpp::TokenList(files)}); simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui, &outputList); + const simplecpp::TokenList rawtokens = makeTokenList(code,files); + simplecpp::DUI dui; + dui.includePaths.emplace_back("."); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); ASSERT_EQUALS("", toString(outputList)); } static void missingHeader3() { - const simplecpp::DUI dui; - std::istringstream istr("#ifdef UNDEFINED\n#include \"notexist.h\"\n#endif\n"); // this file is not included - std::vector files; - std::map filedata; + const char code[] = "#ifdef UNDEFINED\n#include \"notexist.h\"\n#endif\n"; // this file is not included simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui, &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("", toString(outputList)); } +static void missingHeader4() +{ + const char code[] = "#/**/include <>\n"; + simplecpp::OutputList outputList; + simplecpp::DUI dui; + dui.removeComments = true; // TODO: remove this + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS("file0,1,syntax_error,No header in #include\n", toString(outputList)); +} + static void nestedInclude() { - std::istringstream istr("#include \"test.h\"\n"); + const char code[] = "#include \"test.h\"\n"; std::vector files; - simplecpp::TokenList rawtokens(istr,files,"test.h"); - std::map filedata; - filedata["test.h"] = &rawtokens; + const simplecpp::TokenList rawtokens = makeTokenList(code,files,"test.h"); + simplecpp::FileDataCache cache; + cache.insert({"test.h", rawtokens}); - const simplecpp::DUI dui; simplecpp::OutputList outputList; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, dui, &outputList); + simplecpp::DUI dui; + dui.includePaths.emplace_back("."); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); ASSERT_EQUALS("file0,1,include_nested_too_deeply,#include nested too deeply\n", toString(outputList)); } +static void systemInclude() +{ + const char code[] = "#include \n"; + std::vector files; + const simplecpp::TokenList rawtokens = makeTokenList(code,files,"local/limits.h"); + simplecpp::FileDataCache cache; + cache.insert({"include/limits.h", simplecpp::TokenList(files)}); + cache.insert({"local/limits.h", rawtokens}); + + simplecpp::OutputList outputList; + simplecpp::TokenList tokens2(files); + simplecpp::DUI dui; + dui.includePaths.emplace_back("include"); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); + + ASSERT_EQUALS("", toString(outputList)); +} + +static void circularInclude() +{ + std::vector files; + simplecpp::FileDataCache cache; + + { + const char *const path = "test.h"; + const char code[] = + "#ifndef TEST_H\n" + "#define TEST_H\n" + "#include \"a/a.h\"\n" + "#endif\n" + ; + cache.insert({path, makeTokenList(code, files, path)}); + } + + { + const char *const path = "a/a.h"; + const char code[] = + "#ifndef A_H\n" + "#define A_H\n" + "#include \"../test.h\"\n" + "#endif\n" + ; + cache.insert({path, makeTokenList(code, files, path)}); + } + + simplecpp::OutputList outputList; + simplecpp::TokenList tokens2(files); + { + std::vector filenames; + const simplecpp::DUI dui; + + const char code[] = "#include \"test.h\"\n"; + const simplecpp::TokenList rawtokens = makeTokenList(code, files, "test.cpp"); + + cache = simplecpp::load(rawtokens, filenames, dui, &outputList, std::move(cache)); + simplecpp::preprocess(tokens2, rawtokens, files, cache, dui, &outputList); + } + + ASSERT_EQUALS("", toString(outputList)); +} + static void multiline1() { const char code[] = "#define A \\\n" "1\n" "A"; - const simplecpp::DUI dui; - std::istringstream istr(code); - std::vector files; - std::map filedata; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files), files, filedata, dui); - ASSERT_EQUALS("\n\n1", tokens2.stringify()); + ASSERT_EQUALS("\n\n1", preprocess(code)); } static void multiline2() @@ -1466,15 +2366,13 @@ static void multiline2() const char code[] = "#define A /*\\\n" "*/1\n" "A"; - const simplecpp::DUI dui; - std::istringstream istr(code); std::vector files; - simplecpp::TokenList rawtokens(istr,files); + simplecpp::TokenList rawtokens = makeTokenList(code,files); ASSERT_EQUALS("# define A /**/ 1\n\nA", rawtokens.stringify()); rawtokens.removeComments(); - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, dui); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("\n\n1", tokens2.stringify()); } @@ -1483,15 +2381,13 @@ static void multiline3() // #28 - macro with multiline comment const char code[] = "#define A /*\\\n" " */ 1\n" "A"; - const simplecpp::DUI dui; - std::istringstream istr(code); std::vector files; - simplecpp::TokenList rawtokens(istr,files); + simplecpp::TokenList rawtokens = makeTokenList(code,files); ASSERT_EQUALS("# define A /* */ 1\n\nA", rawtokens.stringify()); rawtokens.removeComments(); - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, dui); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("\n\n1", tokens2.stringify()); } @@ -1501,15 +2397,13 @@ static void multiline4() // #28 - macro with multiline comment " /*\\\n" " */ 1\n" "A"; - const simplecpp::DUI dui; - std::istringstream istr(code); std::vector files; - simplecpp::TokenList rawtokens(istr,files); + simplecpp::TokenList rawtokens = makeTokenList(code,files); ASSERT_EQUALS("# define A /* */ 1\n\n\nA", rawtokens.stringify()); rawtokens.removeComments(); - std::map filedata; + simplecpp::FileDataCache cache; simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, rawtokens, files, filedata, dui); + simplecpp::preprocess(tokens2, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("\n\n\n1", tokens2.stringify()); } @@ -1517,12 +2411,10 @@ static void multiline5() // column { const char code[] = "#define A\\\n" "("; - const simplecpp::DUI dui; - std::istringstream istr(code); std::vector files; - simplecpp::TokenList rawtokens(istr,files); + const simplecpp::TokenList rawtokens = makeTokenList(code, files); ASSERT_EQUALS("# define A (", rawtokens.stringify()); - ASSERT_EQUALS(11, rawtokens.back()->location.col); + ASSERT_EQUALS(11, rawtokens.cback()->location.col); } static void multiline6() // multiline string in macro @@ -1530,10 +2422,8 @@ static void multiline6() // multiline string in macro const char code[] = "#define string (\"\\\n" "x\")\n" "string\n"; - const simplecpp::DUI dui; - std::istringstream istr(code); std::vector files; - simplecpp::TokenList rawtokens(istr,files); + const simplecpp::TokenList rawtokens = makeTokenList(code, files); ASSERT_EQUALS("# define string ( \"x\" )\n" "\n" "string", rawtokens.stringify()); @@ -1544,10 +2434,8 @@ static void multiline7() // multiline string in macro const char code[] = "#define A(X) aaa { f(\"\\\n" "a\"); }\n" "A(1)"; - const simplecpp::DUI dui; - std::istringstream istr(code); std::vector files; - simplecpp::TokenList rawtokens(istr,files); + const simplecpp::TokenList rawtokens = makeTokenList(code, files); ASSERT_EQUALS("# define A ( X ) aaa { f ( \"a\" ) ; }\n" "\n" "A ( 1 )", rawtokens.stringify()); @@ -1584,8 +2472,7 @@ static void nullDirective1() "#endif\n" "x = a;\n"; - const simplecpp::DUI dui; - ASSERT_EQUALS("\n\n\n\nx = 1 ;", preprocess(code, dui)); + ASSERT_EQUALS("\n\n\n\nx = 1 ;", preprocess(code)); } static void nullDirective2() @@ -1596,8 +2483,7 @@ static void nullDirective2() "#endif\n" "x = a;\n"; - const simplecpp::DUI dui; - ASSERT_EQUALS("\n\n\n\nx = 1 ;", preprocess(code, dui)); + ASSERT_EQUALS("\n\n\n\nx = 1 ;", preprocess(code)); } static void nullDirective3() @@ -1608,8 +2494,7 @@ static void nullDirective3() "#endif\n" "x = a;\n"; - const simplecpp::DUI dui; - ASSERT_EQUALS("\n\n\n\nx = 1 ;", preprocess(code, dui)); + ASSERT_EQUALS("\n\n\n\nx = 1 ;", preprocess(code)); } static void include1() @@ -1627,27 +2512,26 @@ static void include2() static void include3() // #16 - crash when expanding macro from header { const char code_c[] = "#include \"A.h\"\n" - "glue(1,2,3,4)\n" ; + "glue(1,2,3,4)\n"; const char code_h[] = "#define glue(a,b,c,d) a##b##c##d\n"; std::vector files; - std::istringstream istr_c(code_c); - simplecpp::TokenList rawtokens_c(istr_c, files, "A.c"); - - std::istringstream istr_h(code_h); - simplecpp::TokenList rawtokens_h(istr_h, files, "A.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "A.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "A.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("A.c", files[0]); ASSERT_EQUALS("A.h", files[1]); - std::map filedata; - filedata["A.c"] = &rawtokens_c; - filedata["A.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"A.c", rawtokens_c}); + cache.insert({"A.h", rawtokens_h}); simplecpp::TokenList out(files); - simplecpp::preprocess(out, rawtokens_c, files, filedata, simplecpp::DUI()); + simplecpp::DUI dui; + dui.includePaths.emplace_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n1234", out.stringify()); } @@ -1655,29 +2539,27 @@ static void include3() // #16 - crash when expanding macro from header static void include4() // #27 - -include { - const char code_c[] = "X\n" ; + const char code_c[] = "X\n"; const char code_h[] = "#define X 123\n"; std::vector files; - std::istringstream istr_c(code_c); - simplecpp::TokenList rawtokens_c(istr_c, files, "27.c"); - - std::istringstream istr_h(code_h); - simplecpp::TokenList rawtokens_h(istr_h, files, "27.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "27.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "27.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("27.c", files[0]); ASSERT_EQUALS("27.h", files[1]); - std::map filedata; - filedata["27.c"] = &rawtokens_c; - filedata["27.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"27.c", rawtokens_c}); + cache.insert({"27.h", rawtokens_h}); simplecpp::TokenList out(files); simplecpp::DUI dui; - dui.includes.push_back("27.h"); - simplecpp::preprocess(out, rawtokens_c, files, filedata, dui); + dui.includePaths.emplace_back("."); + dui.includes.emplace_back("27.h"); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("123", out.stringify()); } @@ -1688,18 +2570,22 @@ static void include5() // #3 - handle #include MACRO const char code_h[] = "123\n"; std::vector files; - std::istringstream istr_c(code_c); - simplecpp::TokenList rawtokens_c(istr_c, files, "3.c"); - std::istringstream istr_h(code_h); - simplecpp::TokenList rawtokens_h(istr_h, files, "3.h"); - std::map filedata; - filedata["3.c"] = &rawtokens_c; - filedata["3.h"] = &rawtokens_h; + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "3.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "3.h"); + + ASSERT_EQUALS(2U, files.size()); + ASSERT_EQUALS("3.c", files[0]); + ASSERT_EQUALS("3.h", files[1]); + + simplecpp::FileDataCache cache; + cache.insert({"3.c", rawtokens_c}); + cache.insert({"3.h", rawtokens_h}); simplecpp::TokenList out(files); simplecpp::DUI dui; - simplecpp::preprocess(out, rawtokens_c, files, filedata, dui); + dui.includePaths.emplace_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 1 \"3.h\"\n123", out.stringify()); } @@ -1709,15 +2595,17 @@ static void include6() // #57 - incomplete macro #include MACRO(,) const char code[] = "#define MACRO(X,Y) X##Y\n#include MACRO(,)\n"; std::vector files; - std::istringstream istr(code); - simplecpp::TokenList rawtokens(istr, files, "57.c"); - std::map filedata; - filedata["57.c"] = &rawtokens; + const simplecpp::TokenList rawtokens = makeTokenList(code, files, "57.c"); + + ASSERT_EQUALS(1U, files.size()); + ASSERT_EQUALS("57.c", files[0]); + + simplecpp::FileDataCache cache; + cache.insert({"57.c", rawtokens}); simplecpp::TokenList out(files); - simplecpp::DUI dui; - simplecpp::preprocess(out, rawtokens, files, filedata, dui); + simplecpp::preprocess(out, rawtokens, files, cache, simplecpp::DUI()); } @@ -1728,19 +2616,22 @@ static void include7() // #include MACRO const char code_h[] = "123\n"; std::vector files; - std::istringstream istr_c(code_c); - simplecpp::TokenList rawtokens_c(istr_c, files, "3.c"); - std::istringstream istr_h(code_h); - simplecpp::TokenList rawtokens_h(istr_h, files, "3.h"); - std::map filedata; - filedata["3.c"] = &rawtokens_c; - filedata["3.h"] = &rawtokens_h; + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "3.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "3.h"); + + ASSERT_EQUALS(2U, files.size()); + ASSERT_EQUALS("3.c", files[0]); + ASSERT_EQUALS("3.h", files[1]); + + simplecpp::FileDataCache cache; + cache.insert({"3.c", rawtokens_c}); + cache.insert({"3.h", rawtokens_h}); simplecpp::TokenList out(files); simplecpp::DUI dui; - dui.includePaths.push_back("."); - simplecpp::preprocess(out, rawtokens_c, files, filedata, dui); + dui.includePaths.emplace_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 1 \"3.h\"\n123", out.stringify()); } @@ -1750,16 +2641,39 @@ static void include8() // #include MACRO(X) const char code[] = "#define INCLUDE_LOCATION ../somewhere\n" "#define INCLUDE_FILE(F) \n" "#include INCLUDE_FILE(header)\n"; - - std::istringstream istr(code); - std::vector files; - std::map filedata; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("", preprocess(code, &outputList)); ASSERT_EQUALS("file0,3,missing_header,Header not found: <../somewhere/header.h>\n", toString(outputList)); } +static void include9() +{ + const char code_c[] = "#define HDR \"1.h\"\n" + "#include HDR\n"; + const char code_h[] = "/**/ #define X 1\n" // <- comment before hash should be ignored + "x=X;"; + + std::vector files; + + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "1.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "1.h"); + + ASSERT_EQUALS(2U, files.size()); + ASSERT_EQUALS("1.c", files[0]); + ASSERT_EQUALS("1.h", files[1]); + + simplecpp::FileDataCache cache; + cache.insert({"1.c", rawtokens_c}); + cache.insert({"1.h", rawtokens_h}); + + simplecpp::TokenList out(files); + simplecpp::DUI dui; + dui.includePaths.emplace_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); + + ASSERT_EQUALS("\n#line 2 \"1.h\"\nx = 1 ;", out.stringify()); +} + static void readfile_nullbyte() { const char code[] = "ab\0cd"; @@ -1792,11 +2706,11 @@ static void readfile_char_error() { simplecpp::OutputList outputList; - readfile("A = L's", -1, &outputList); + readfile("A = L's", &outputList); ASSERT_EQUALS("file0,1,syntax_error,No pair for character (\'). Can't process file. File is either invalid or unicode, which is currently not supported.\n", toString(outputList)); outputList.clear(); - readfile("A = 's\n'", -1, &outputList); + readfile("A = 's\n'", &outputList); ASSERT_EQUALS("file0,1,syntax_error,No pair for character (\'). Can't process file. File is either invalid or unicode, which is currently not supported.\n", toString(outputList)); } @@ -1850,36 +2764,36 @@ static void readfile_string_error() { simplecpp::OutputList outputList; - readfile("A = \"abs", -1, &outputList); + readfile("A = \"abs", &outputList); ASSERT_EQUALS("file0,1,syntax_error,No pair for character (\"). Can't process file. File is either invalid or unicode, which is currently not supported.\n", toString(outputList)); outputList.clear(); - readfile("A = u8\"abs\n\"", -1, &outputList); + readfile("A = u8\"abs\n\"", &outputList); ASSERT_EQUALS("file0,1,syntax_error,No pair for character (\"). Can't process file. File is either invalid or unicode, which is currently not supported.\n", toString(outputList)); outputList.clear(); - readfile("A = R\"as\n(abc)as\"", -1, &outputList); + readfile("A = R\"as\n(abc)as\"", &outputList); ASSERT_EQUALS("file0,1,syntax_error,Invalid newline in raw string delimiter.\n", toString(outputList)); outputList.clear(); - readfile("A = u8R\"as\n(abc)as\"", -1, &outputList); + readfile("A = u8R\"as\n(abc)as\"", &outputList); ASSERT_EQUALS("file0,1,syntax_error,Invalid newline in raw string delimiter.\n", toString(outputList)); outputList.clear(); - readfile("A = R\"as(abc)a\"", -1, &outputList); + readfile("A = R\"as(abc)a\"", &outputList); ASSERT_EQUALS("file0,1,syntax_error,Raw string missing terminating delimiter.\n", toString(outputList)); outputList.clear(); - readfile("A = LR\"as(abc)a\"", -1, &outputList); + readfile("A = LR\"as(abc)a\"", &outputList); ASSERT_EQUALS("file0,1,syntax_error,Raw string missing terminating delimiter.\n", toString(outputList)); outputList.clear(); - readfile("#define A \"abs", -1, &outputList); + readfile("#define A \"abs", &outputList); ASSERT_EQUALS("file0,1,syntax_error,No pair for character (\"). Can't process file. File is either invalid or unicode, which is currently not supported.\n", toString(outputList)); outputList.clear(); // Don't warn for a multiline define - readfile("#define A \"abs\\\n\"", -1, &outputList); + readfile("#define A \"abs\\\n\"", &outputList); ASSERT_EQUALS("", toString(outputList)); } @@ -1891,11 +2805,11 @@ static void readfile_cpp14_number() static void readfile_unhandled_chars() { simplecpp::OutputList outputList; - readfile("// 你好世界", -1, &outputList); + readfile("// 你好世界", &outputList); ASSERT_EQUALS("", toString(outputList)); - readfile("s=\"你好世界\"", -1, &outputList); + readfile("s=\"你好世界\"", &outputList); ASSERT_EQUALS("", toString(outputList)); - readfile("int 你好世界=0;", -1, &outputList); + readfile("int 你好世界=0;", &outputList); ASSERT_EQUALS("file0,1,unhandled_char_error,The code contains unhandled character(s) (character code=228). Neither unicode nor extended ascii is supported.\n", toString(outputList)); } @@ -1907,6 +2821,14 @@ static void readfile_error() "X",readfile("#if !A\n#error\n#endif\nX\n")); } +static void readfile_file_not_found() +{ + simplecpp::OutputList outputList; + std::vector files; + (void)simplecpp::TokenList("NotAFile", files, &outputList); + ASSERT_EQUALS("file0,0,file_not_found,File is missing: NotAFile\n", toString(outputList)); +} + static void stringify1() { const char code_c[] = "#include \"A.h\"\n" @@ -1915,22 +2837,21 @@ static void stringify1() std::vector files; - std::istringstream istr_c(code_c); - simplecpp::TokenList rawtokens_c(istr_c, files, "A.c"); - - std::istringstream istr_h(code_h); - simplecpp::TokenList rawtokens_h(istr_h, files, "A.h"); + const simplecpp::TokenList rawtokens_c = makeTokenList(code_c, files, "A.c"); + const simplecpp::TokenList rawtokens_h = makeTokenList(code_h, files, "A.h"); ASSERT_EQUALS(2U, files.size()); ASSERT_EQUALS("A.c", files[0]); ASSERT_EQUALS("A.h", files[1]); - std::map filedata; - filedata["A.c"] = &rawtokens_c; - filedata["A.h"] = &rawtokens_h; + simplecpp::FileDataCache cache; + cache.insert({"A.c", rawtokens_c}); + cache.insert({"A.h", rawtokens_h}); simplecpp::TokenList out(files); - simplecpp::preprocess(out, rawtokens_c, files, filedata, simplecpp::DUI()); + simplecpp::DUI dui; + dui.includePaths.emplace_back("."); + simplecpp::preprocess(out, rawtokens_c, files, cache, dui); ASSERT_EQUALS("\n#line 1 \"A.h\"\n1\n2\n#line 1 \"A.h\"\n1\n2", out.stringify()); } @@ -1939,12 +2860,11 @@ static void tokenMacro1() { const char code[] = "#define A 123\n" "A"; - const simplecpp::DUI dui; std::vector files; - std::map filedata; - std::istringstream istr(code); + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); - simplecpp::preprocess(tokenList, simplecpp::TokenList(istr,files), files, filedata, dui); + const simplecpp::TokenList rawtokens = makeTokenList(code,files); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); ASSERT_EQUALS("A", tokenList.cback()->macro); } @@ -1952,12 +2872,11 @@ static void tokenMacro2() { const char code[] = "#define ADD(X,Y) X+Y\n" "ADD(1,2)"; - const simplecpp::DUI dui; std::vector files; - std::map filedata; - std::istringstream istr(code); + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); - simplecpp::preprocess(tokenList, simplecpp::TokenList(istr,files), files, filedata, dui); + const simplecpp::TokenList rawtokens = makeTokenList(code,files); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); const simplecpp::Token *tok = tokenList.cfront(); ASSERT_EQUALS("1", tok->str()); ASSERT_EQUALS("", tok->macro); @@ -1974,12 +2893,11 @@ static void tokenMacro3() const char code[] = "#define ADD(X,Y) X+Y\n" "#define FRED 1\n" "ADD(FRED,2)"; - const simplecpp::DUI dui; std::vector files; - std::map filedata; - std::istringstream istr(code); + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); - simplecpp::preprocess(tokenList, simplecpp::TokenList(istr,files), files, filedata, dui); + const simplecpp::TokenList rawtokens = makeTokenList(code,files); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); const simplecpp::Token *tok = tokenList.cfront(); ASSERT_EQUALS("1", tok->str()); ASSERT_EQUALS("FRED", tok->macro); @@ -1996,13 +2914,12 @@ static void tokenMacro4() const char code[] = "#define A B\n" "#define B 1\n" "A"; - const simplecpp::DUI dui; std::vector files; - std::map filedata; - std::istringstream istr(code); + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); - simplecpp::preprocess(tokenList, simplecpp::TokenList(istr,files), files, filedata, dui); - const simplecpp::Token *tok = tokenList.cfront(); + const simplecpp::TokenList rawtokens = makeTokenList(code,files); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); + const simplecpp::Token * const tok = tokenList.cfront(); ASSERT_EQUALS("1", tok->str()); ASSERT_EQUALS("A", tok->macro); } @@ -2012,43 +2929,32 @@ static void tokenMacro5() const char code[] = "#define SET_BPF(code) (code)\n" "#define SET_BPF_JUMP(code) SET_BPF(D | code)\n" "SET_BPF_JUMP(A | B | C);"; - const simplecpp::DUI dui; std::vector files; - std::map filedata; - std::istringstream istr(code); + simplecpp::FileDataCache cache; simplecpp::TokenList tokenList(files); - simplecpp::preprocess(tokenList, simplecpp::TokenList(istr,files), files, filedata, dui); - const simplecpp::Token *tok = tokenList.cfront()->next; + const simplecpp::TokenList rawtokens = makeTokenList(code,files); + simplecpp::preprocess(tokenList, rawtokens, files, cache, simplecpp::DUI()); + const simplecpp::Token * const tok = tokenList.cfront()->next; ASSERT_EQUALS("D", tok->str()); ASSERT_EQUALS("SET_BPF_JUMP", tok->macro); } static void undef() { - std::istringstream istr("#define A\n" - "#undef A\n" - "#ifdef A\n" - "123\n" - "#endif"); - const simplecpp::DUI dui; - std::vector files; - std::map filedata; - simplecpp::TokenList tokenList(files); - simplecpp::preprocess(tokenList, simplecpp::TokenList(istr,files), files, filedata, dui); - ASSERT_EQUALS("", tokenList.stringify()); + const char code[] = "#define A\n" + "#undef A\n" + "#ifdef A\n" + "123\n" + "#endif"; + ASSERT_EQUALS("", preprocess(code)); } static void userdef() { - std::istringstream istr("#ifdef A\n123\n#endif\n"); + const char code[] = "#ifdef A\n123\n#endif\n"; simplecpp::DUI dui; - dui.defines.push_back("A=1"); - std::vector files; - const simplecpp::TokenList tokens1 = simplecpp::TokenList(istr, files); - std::map filedata; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, tokens1, files, filedata, dui); - ASSERT_EQUALS("\n123", tokens2.stringify()); + dui.defines.emplace_back("A=1"); + ASSERT_EQUALS("\n123", preprocess(code, dui)); } static void utf8() @@ -2056,26 +2962,85 @@ static void utf8() ASSERT_EQUALS("123", readfile("\xEF\xBB\xBF 123")); } +static void utf8_invalid() +{ + ASSERT_EQUALS("", readfile("\xEF 123")); + ASSERT_EQUALS("", readfile("\xEF\xBB 123")); +} + static void unicode() { - ASSERT_EQUALS("12", readfile("\xFE\xFF\x00\x31\x00\x32", 6)); - ASSERT_EQUALS("12", readfile("\xFF\xFE\x31\x00\x32\x00", 6)); - ASSERT_EQUALS("//\n1", readfile("\xFE\xFF\x00\x2f\x00\x2f\x00\x0a\x00\x31", 10)); - ASSERT_EQUALS("//\n1", readfile("\xFF\xFE\x2f\x00\x2f\x00\x0a\x00\x31\x00", 10)); - ASSERT_EQUALS("\"a\"", readfile("\xFE\xFF\x00\x22\x00\x61\x00\x22", 8)); - ASSERT_EQUALS("\"a\"", readfile("\xFF\xFE\x22\x00\x61\x00\x22\x00", 8)); - ASSERT_EQUALS("\n//1", readfile("\xff\xfe\x0d\x00\x0a\x00\x2f\x00\x2f\x00\x31\x00\x0d\x00\x0a\x00",16)); + { + const char code[] = "\xFE\xFF\x00\x31\x00\x32"; + ASSERT_EQUALS("12", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31\x00\x32\x00"; + ASSERT_EQUALS("12", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x00\x2f\x00\x2f\x00\x0a\x00\x31"; + ASSERT_EQUALS("//\n1", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x2f\x00\x2f\x00\x0a\x00\x31\x00"; + ASSERT_EQUALS("//\n1", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x00\x22\x00\x61\x00\x22"; + ASSERT_EQUALS("\"a\"", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x22\x00\x61\x00\x22\x00"; + ASSERT_EQUALS("\"a\"", readfile(code, sizeof(code))); + } + { + const char code[] = "\xff\xfe\x0d\x00\x0a\x00\x2f\x00\x2f\x00\x31\x00\x0d\x00\x0a\x00"; + ASSERT_EQUALS("\n//1", readfile(code, sizeof(code))); + } +} + +static void unicode_invalid() +{ + { + const char code[] = "\xFF"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x31"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31"; + ASSERT_EQUALS("1", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x31\x32"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31\x32"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x00\x31\x00\x32\x33"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31\x00\x32\x00\x33"; + ASSERT_EQUALS("123", readfile(code, sizeof(code))); + } } static void warning() { - std::istringstream istr("#warning MSG\n1"); - std::vector files; - std::map filedata; + const char code[] = "#warning MSG\n1"; simplecpp::OutputList outputList; - simplecpp::TokenList tokens2(files); - simplecpp::preprocess(tokens2, simplecpp::TokenList(istr,files,"test.c"), files, filedata, simplecpp::DUI(), &outputList); - ASSERT_EQUALS("\n1", tokens2.stringify()); + ASSERT_EQUALS("\n1", preprocess(code, &outputList)); ASSERT_EQUALS("file0,1,#warning,#warning MSG\n", toString(outputList)); } @@ -2152,8 +3117,8 @@ static void simplifyPath_cppcheck() ASSERT_EQUALS("src/", simplecpp::simplifyPath("src/abc/../")); // Handling of UNC paths on Windows - ASSERT_EQUALS("//src/test.cpp", simplecpp::simplifyPath("//src/test.cpp")); - ASSERT_EQUALS("//src/test.cpp", simplecpp::simplifyPath("///src/test.cpp")); + ASSERT_EQUALS("//" STRINGIZE(UNCHOST) "/test.cpp", simplecpp::simplifyPath("//" STRINGIZE(UNCHOST) "/test.cpp")); + ASSERT_EQUALS("//" STRINGIZE(UNCHOST) "/test.cpp", simplecpp::simplifyPath("///" STRINGIZE(UNCHOST) "/test.cpp")); } static void simplifyPath_New() @@ -2169,22 +3134,523 @@ static void preprocessSizeOf() { simplecpp::OutputList outputList; - preprocess("#if 3 > sizeof", simplecpp::DUI(), &outputList); + ASSERT_EQUALS("", preprocess("#if 3 > sizeof", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition, missing sizeof argument\n", toString(outputList)); outputList.clear(); - preprocess("#if 3 > sizeof A", simplecpp::DUI(), &outputList); + ASSERT_EQUALS("", preprocess("#if 3 > sizeof A", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition, missing sizeof argument\n", toString(outputList)); outputList.clear(); - preprocess("#if 3 > sizeof(int", simplecpp::DUI(), &outputList); + ASSERT_EQUALS("", preprocess("#if 3 > sizeof(int", &outputList)); ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition, invalid sizeof expression\n", toString(outputList)); } -int main(int argc, char **argv) +static void timeDefine() +{ + const char code[] = "__TIME__"; + const std::string t = preprocess(code); + // "19:09:53" + ASSERT_EQUALS(10, t.size()); + // TODO: split string and check proper ranges instead + ASSERT_EQUALS('"', t[0]); + ASSERT_EQUALS(true, isdigit(t[1]) != 0); + ASSERT_EQUALS(true, isdigit(t[2]) != 0); + ASSERT_EQUALS(':', t[3]); + ASSERT_EQUALS(true, isdigit(t[4]) != 0); + ASSERT_EQUALS(true, isdigit(t[5]) != 0); + ASSERT_EQUALS(':', t[6]); + ASSERT_EQUALS(true, isdigit(t[7]) != 0); + ASSERT_EQUALS(true, isdigit(t[8]) != 0); + ASSERT_EQUALS('"', t[9]); +} + +static void dateDefine() +{ + const char code[] = "__DATE__"; + const std::string dt = preprocess(code); + // "\"Mar 11 2022\"" + ASSERT_EQUALS(13, dt.size()); + // TODO: split string and check proper ranges instead + ASSERT_EQUALS('"', dt[0]); + ASSERT_EQUALS(true, dt[1] >= 'A' && dt[1] <= 'Z'); // uppercase letter + ASSERT_EQUALS(true, dt[2] >= 'a' && dt[2] <= 'z'); // lowercase letter + ASSERT_EQUALS(true, dt[3] >= 'a' && dt[3] <= 'z'); // lowercase letter + ASSERT_EQUALS(' ', dt[4]); + ASSERT_EQUALS(true, isdigit(dt[5]) != 0); + ASSERT_EQUALS(true, isdigit(dt[6]) != 0); + ASSERT_EQUALS(' ', dt[7]); + ASSERT_EQUALS(true, isdigit(dt[8]) != 0); + ASSERT_EQUALS(true, isdigit(dt[9]) != 0); + ASSERT_EQUALS(true, isdigit(dt[10]) != 0); + ASSERT_EQUALS(true, isdigit(dt[11]) != 0); + ASSERT_EQUALS('"', dt[12]); +} + +static void stdcVersionDefine() +{ + const char code[] = "#if defined(__STDC_VERSION__)\n" + " __STDC_VERSION__\n" + "#endif\n"; + simplecpp::DUI dui; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "c11"; + ASSERT_EQUALS("\n201112L", preprocess(code, dui)); +} + +static void cpluscplusDefine() { + const char code[] = "#if defined(__cplusplus)\n" + " __cplusplus\n" + "#endif\n"; + simplecpp::DUI dui; + ASSERT_EQUALS("", preprocess(code, dui)); + dui.std = "c++11"; + ASSERT_EQUALS("\n201103L", preprocess(code, dui)); +} + +static void invalidStd() +{ + const char code[] = ""; + simplecpp::DUI dui; + simplecpp::OutputList outputList; + + dui.std = "c88"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(1, outputList.size()); + ASSERT_EQUALS(simplecpp::Output::Type::DUI_ERROR, outputList.cbegin()->type); + ASSERT_EQUALS("unknown standard specified: 'c88'", outputList.cbegin()->msg); + outputList.clear(); + + dui.std = "gnu88"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(1, outputList.size()); + ASSERT_EQUALS(simplecpp::Output::Type::DUI_ERROR, outputList.cbegin()->type); + ASSERT_EQUALS("unknown standard specified: 'gnu88'", outputList.cbegin()->msg); + outputList.clear(); + + dui.std = "d99"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(1, outputList.size()); + ASSERT_EQUALS(simplecpp::Output::Type::DUI_ERROR, outputList.cbegin()->type); + ASSERT_EQUALS("unknown standard specified: 'd99'", outputList.cbegin()->msg); + outputList.clear(); + + dui.std = "c++77"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(1, outputList.size()); + ASSERT_EQUALS(simplecpp::Output::Type::DUI_ERROR, outputList.cbegin()->type); + ASSERT_EQUALS("unknown standard specified: 'c++77'", outputList.cbegin()->msg); + outputList.clear(); + + dui.std = "gnu++33"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(1, outputList.size()); + ASSERT_EQUALS(simplecpp::Output::Type::DUI_ERROR, outputList.cbegin()->type); + ASSERT_EQUALS("unknown standard specified: 'gnu++33'", outputList.cbegin()->msg); + outputList.clear(); +} + +static void stdEnum() +{ + ASSERT_EQUALS(simplecpp::cstd_t::C89, simplecpp::getCStd("c89")); + ASSERT_EQUALS(simplecpp::cstd_t::C89, simplecpp::getCStd("c90")); + ASSERT_EQUALS(simplecpp::cstd_t::C11, simplecpp::getCStd("iso9899:2011")); + ASSERT_EQUALS(simplecpp::cstd_t::C23, simplecpp::getCStd("gnu23")); + ASSERT_EQUALS(simplecpp::cstd_t::CUnknown, simplecpp::getCStd("gnu77")); + ASSERT_EQUALS(simplecpp::cstd_t::CUnknown, simplecpp::getCStd("c++11")); + + ASSERT_EQUALS(simplecpp::cppstd_t::CPP03, simplecpp::getCppStd("c++03")); + ASSERT_EQUALS(simplecpp::cppstd_t::CPP03, simplecpp::getCppStd("c++98")); + ASSERT_EQUALS(simplecpp::cppstd_t::CPP17, simplecpp::getCppStd("c++1z")); + ASSERT_EQUALS(simplecpp::cppstd_t::CPP26, simplecpp::getCppStd("gnu++26")); + ASSERT_EQUALS(simplecpp::cppstd_t::CPPUnknown, simplecpp::getCppStd("gnu++77")); + ASSERT_EQUALS(simplecpp::cppstd_t::CPPUnknown, simplecpp::getCppStd("c11")); +} + +static void stdValid() +{ + const char code[] = ""; + simplecpp::DUI dui; + simplecpp::OutputList outputList; + + dui.std = "c89"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(0, outputList.size()); + outputList.clear(); + + dui.std = "gnu23"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(0, outputList.size()); + outputList.clear(); + + dui.std = "c++03"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(0, outputList.size()); + outputList.clear(); + + dui.std = "gnu++26"; + ASSERT_EQUALS("", preprocess(code, dui, &outputList)); + ASSERT_EQUALS(0, outputList.size()); + outputList.clear(); +} + +static void assertToken(const std::string& s, bool name, bool number, bool comment, char op, int line) +{ + const std::vector f; + const simplecpp::Location l; + const simplecpp::Token t(s, l); + assertEquals(name, t.name, line); + assertEquals(number, t.number, line); + assertEquals(comment, t.comment, line); + assertEquals(op, t.op, line); +} + +#define ASSERT_TOKEN(s, na, nu, c) assertToken(s, na, nu, c, '\0', __LINE__) +#define ASSERT_TOKEN_OP(s, na, nu, c, o) assertToken(s, na, nu, c, o, __LINE__) + +static void token() +{ + // name + ASSERT_TOKEN("n", true, false, false); + ASSERT_TOKEN("name", true, false, false); + ASSERT_TOKEN("name_1", true, false, false); + ASSERT_TOKEN("name2", true, false, false); + ASSERT_TOKEN("name$", true, false, false); + + // character literal + ASSERT_TOKEN("'n'", false, false, false); + ASSERT_TOKEN("'\\''", false, false, false); + ASSERT_TOKEN("'\\u0012'", false, false, false); + ASSERT_TOKEN("'\\xff'", false, false, false); + ASSERT_TOKEN("u8'\\u0012'", false, false, false); + ASSERT_TOKEN("u'\\u0012'", false, false, false); + ASSERT_TOKEN("L'\\u0012'", false, false, false); + ASSERT_TOKEN("U'\\u0012'", false, false, false); + + // include + ASSERT_TOKEN("", false, false, false); + + // comment + ASSERT_TOKEN("/*comment*/", false, false, true); + ASSERT_TOKEN("// TODO", false, false, true); + + // string literal + ASSERT_TOKEN("\"literal\"", false, false, false); + + // op + ASSERT_TOKEN_OP("<", false, false, false, '<'); + ASSERT_TOKEN_OP(">", false, false, false, '>'); + ASSERT_TOKEN_OP("(", false, false, false, '('); + ASSERT_TOKEN_OP(")", false, false, false, ')'); + + // number + ASSERT_TOKEN("2", false, true, false); + ASSERT_TOKEN("22", false, true, false); + ASSERT_TOKEN("-2", false, true, false); + ASSERT_TOKEN("-22", false, true, false); + ASSERT_TOKEN("+2", false, true, false); + ASSERT_TOKEN("+22", false, true, false); +} + +static void preprocess_files() +{ + { + const char code[] = "#define A"; + std::vector files; + + const simplecpp::TokenList tokens = makeTokenList(code, files); + ASSERT_EQUALS(1, files.size()); + ASSERT_EQUALS("", *files.cbegin()); + + simplecpp::TokenList tokens2(files); + ASSERT_EQUALS(1, files.size()); + ASSERT_EQUALS("", *files.cbegin()); + + simplecpp::FileDataCache cache; + simplecpp::preprocess(tokens2, tokens, files, cache, simplecpp::DUI(), nullptr); + ASSERT_EQUALS(1, files.size()); + ASSERT_EQUALS("", *files.cbegin()); + } + { + const char code[] = "#define A"; + std::vector files; + + const simplecpp::TokenList tokens = makeTokenList(code, files, "test.cpp"); + ASSERT_EQUALS(1, files.size()); + ASSERT_EQUALS("test.cpp", *files.cbegin()); + + simplecpp::TokenList tokens2(files); + ASSERT_EQUALS(1, files.size()); + ASSERT_EQUALS("test.cpp", *files.cbegin()); + + simplecpp::FileDataCache cache; + simplecpp::preprocess(tokens2, tokens, files, cache, simplecpp::DUI(), nullptr); + ASSERT_EQUALS(1, files.size()); + ASSERT_EQUALS("test.cpp", *files.cbegin()); + } +} + +static void tokenlist_api() +{ + std::vector filenames; +# if !defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) + // sized array + size + { + char input[] = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList(input,sizeof(input),filenames,""); + } + { + const char input[] = "code"; + simplecpp::TokenList(input,sizeof(input),filenames,""); + } + { + unsigned char input[] = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList(input,sizeof(input),filenames,""); + } + { + const unsigned char input[] = "code"; + simplecpp::TokenList(input,sizeof(input),filenames,""); + } +#endif // !defined(__cpp_lib_string_view) && !defined(__cpp_lib_span) + // pointer via View + { + const char * const input = "code"; + simplecpp::TokenList({input},filenames,""); + } + // sized array via View + { + char input[] = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList(simplecpp::View{input},filenames,""); + } + { + const char input[] = "code"; + simplecpp::TokenList(simplecpp::View{input},filenames,""); + } + // sized array + size via View/std::span + { + char input[] = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList({input,sizeof(input)},filenames,""); + } + { + const char input[] = "code"; + simplecpp::TokenList({input,sizeof(input)},filenames,""); + } + // sized array + { + char input[] = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList(input,filenames,""); + } + { + const char input[] = "code"; + simplecpp::TokenList(input,filenames,""); + } + { + unsigned char input[] = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList(input,filenames,""); + } + { + const unsigned char input[] = "code"; + simplecpp::TokenList(input,filenames,""); + } + // std::string via View/std::span (implicit) + { + std::string input = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList(input,filenames,""); + } + { + const std::string input = "code"; + simplecpp::TokenList(input,filenames,""); + } + // std::string via View/std::span (explicit) + { + std::string input = "code"; // NOLINT(misc-const-correctness) + simplecpp::TokenList({input},filenames,""); + } + { + const std::string input = "code"; + simplecpp::TokenList({input},filenames,""); + } + + // this test is to make sure the safe APIs are compiling +#ifdef __cpp_lib_string_view + { + const char input[] = "code"; + const std::string_view sv = input; + // std::string_view can be implicitly converted into a std::span + simplecpp::TokenList(sv,filenames,""); + } +#endif // __cpp_lib_string_view +#ifdef __cpp_lib_span + { + char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } + { + const char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } + { + unsigned char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } + { + const unsigned char input[] = "code"; + const std::span sp = input; + simplecpp::TokenList(sp,filenames,""); + } +#endif // __cpp_lib_span +} + +static void bad_macro_syntax() // #616 +{ + simplecpp::DUI dui; + dui.defines.emplace_back("\""); + + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess("", dui, &outputList)); + ASSERT_EQUALS(1, outputList.size()); + ASSERT_EQUALS(simplecpp::Output::Type::DUI_ERROR, outputList.cbegin()->type); + ASSERT_EQUALS("bad macro syntax. macroname=\" value=1", outputList.cbegin()->msg); +} + +static void ifCond() +{ + { + const char code[] = "int i;"; + std::list ifCond; + ASSERT_EQUALS("int i ;", preprocess(code, &ifCond)); + ASSERT_EQUALS(0, ifCond.size()); + } + { + const char code[] = "#if 0\n" + "# elif __GNUC__ == 1\n" + "# elif defined(__APPLE__)\n" + "#endif\n"; + std::list ifCond; + ASSERT_EQUALS("", preprocess(code, &ifCond)); + ASSERT_EQUALS(3, ifCond.size()); + auto it = ifCond.cbegin(); + ASSERT_EQUALS(0, it->location.fileIndex); + ASSERT_EQUALS(1, it->location.line); + ASSERT_EQUALS(2, it->location.col); + ASSERT_EQUALS("0", it->E); + ASSERT_EQUALS(0, it->result); + ++it; + ASSERT_EQUALS(0, it->location.fileIndex); + ASSERT_EQUALS(2, it->location.line); + ASSERT_EQUALS(3, it->location.col); + ASSERT_EQUALS("__GNUC__ == 1", it->E); + ASSERT_EQUALS(0, it->result); + ++it; + ASSERT_EQUALS(0, it->location.fileIndex); + ASSERT_EQUALS(3, it->location.line); + ASSERT_EQUALS(4, it->location.col); + ASSERT_EQUALS("0", it->E); + ASSERT_EQUALS(0, it->result); + } +} + +static void macroUsage() +{ + { + const char code[] = "int i;"; + std::list macroUsage; + ASSERT_EQUALS("int i ;", preprocess(code, ¯oUsage)); + ASSERT_EQUALS(0, macroUsage.size()); + } + { + const char code[] = "#define DEF_1\n" + "#ifdef DEF_1\n" + "#endif\n"; + std::list macroUsage; + ASSERT_EQUALS("", preprocess(code, ¯oUsage)); + ASSERT_EQUALS(1, macroUsage.size()); + auto it = macroUsage.cbegin(); + ASSERT_EQUALS("DEF_1", it->macroName); + ASSERT_EQUALS(0, it->macroLocation.fileIndex); + ASSERT_EQUALS(1, it->macroLocation.line); + ASSERT_EQUALS(9, it->macroLocation.col); + ASSERT_EQUALS(true, it->macroValueKnown); + ASSERT_EQUALS(0, it->useLocation.fileIndex); + ASSERT_EQUALS(2, it->useLocation.line); + ASSERT_EQUALS(8, it->useLocation.col); + } +} + +static void isAbsolutePath() { +#ifdef _WIN32 + ASSERT_EQUALS(true, simplecpp::isAbsolutePath("C:\\foo\\bar")); + ASSERT_EQUALS(true, simplecpp::isAbsolutePath("C:/foo/bar")); + ASSERT_EQUALS(true, simplecpp::isAbsolutePath("\\\\foo\\bar")); + + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("foo\\bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("foo/bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("foo.cpp")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("C:foo.cpp")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("C:foo\\bar.cpp")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("bar.cpp")); + //ASSERT_EQUALS(true, simplecpp::isAbsolutePath("\\")); // TODO + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("0:\\foo\\bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("0:/foo/bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("\\foo\\bar")); + //ASSERT_EQUALS(false, simplecpp::isAbsolutePath("\\\\")); // TODO + //ASSERT_EQUALS(false, simplecpp::isAbsolutePath("//")); // TODO + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("/foo/bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("/")); +#else + ASSERT_EQUALS(true, simplecpp::isAbsolutePath("/foo/bar")); + ASSERT_EQUALS(true, simplecpp::isAbsolutePath("/")); + ASSERT_EQUALS(true, simplecpp::isAbsolutePath("//host/foo/bar")); + + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("foo/bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("foo.cpp")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("C:\\foo\\bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("C:/foo/bar")); + ASSERT_EQUALS(false, simplecpp::isAbsolutePath("\\\\foo\\bar")); +#endif +} + +// crashes detected by fuzzer +static void fuzz_crash() +{ + { + const char code[] = "#define n __VA_OPT__(u\n" + "n\n"; + (void)preprocess(code, simplecpp::DUI()); // do not crash + } + { // #346 + const char code[] = "#define foo(intp)f##oo(intp\n" + "foo(f##oo(intp))\n"; + (void)preprocess(code, simplecpp::DUI()); // do not crash + } + { // #546 + const char code[] = "#if __has_include<\n"; + simplecpp::OutputList outputList; + ASSERT_EQUALS("", preprocess(code, &outputList)); // do not crash + ASSERT_EQUALS("file0,1,syntax_error,failed to evaluate #if condition\n", toString(outputList)); + } +} + +// memory leaks detected by LSAN/valgrind +static void leak() +{ + { // #498 + const char code[] = "#define e(...)__VA_OPT__()\n" + "#define e\n"; + (void)preprocess(code, simplecpp::DUI()); + } +} + +static void runTests(int argc, char **argv, Input input) +{ + USE_INPUT = input; + TEST_CASE(backslash); TEST_CASE(builtin); @@ -2202,7 +3668,9 @@ int main(int argc, char **argv) TEST_CASE(constFold); +#ifdef __CYGWIN__ TEST_CASE(convertCygwinPath); +#endif TEST_CASE(define1); TEST_CASE(define2); @@ -2215,6 +3683,8 @@ int main(int argc, char **argv) TEST_CASE(define9); TEST_CASE(define10); TEST_CASE(define11); + TEST_CASE(define12); + TEST_CASE(define13); TEST_CASE(define_invalid_1); TEST_CASE(define_invalid_2); TEST_CASE(define_define_1); @@ -2228,15 +3698,35 @@ int main(int argc, char **argv) TEST_CASE(define_define_9); // line break in nested macro call TEST_CASE(define_define_10); TEST_CASE(define_define_11); + TEST_CASE(define_define_11a); TEST_CASE(define_define_12); // expand result of ## TEST_CASE(define_define_13); TEST_CASE(define_define_14); TEST_CASE(define_define_15); TEST_CASE(define_define_16); TEST_CASE(define_define_17); + TEST_CASE(define_define_18); + TEST_CASE(define_define_19); + TEST_CASE(define_define_20); // 384 arg contains comma + TEST_CASE(define_define_21); + TEST_CASE(define_define_22); // #400 + TEST_CASE(define_define_23); // #403 - crash, infinite recursion + TEST_CASE(define_define_24); // #590 TEST_CASE(define_va_args_1); TEST_CASE(define_va_args_2); TEST_CASE(define_va_args_3); + TEST_CASE(define_va_args_4); + TEST_CASE(define_va_opt_1); + TEST_CASE(define_va_opt_2); + TEST_CASE(define_va_opt_3); + TEST_CASE(define_va_opt_4); + TEST_CASE(define_va_opt_5); + TEST_CASE(define_va_opt_6); + TEST_CASE(define_va_opt_7); + TEST_CASE(define_va_opt_8); + TEST_CASE(define_va_opt_9); // #632 + + TEST_CASE(pragma_backslash); // multiline pragma directive // UB: #ifdef as macro parameter TEST_CASE(define_ifdef); @@ -2257,6 +3747,7 @@ int main(int argc, char **argv) TEST_CASE(hashhash2); TEST_CASE(hashhash3); TEST_CASE(hashhash4); + TEST_CASE(hashhash4a); // #66, #130 TEST_CASE(hashhash5); TEST_CASE(hashhash6); TEST_CASE(hashhash7); // # ## # (C standard; 6.10.3.3.p4) @@ -2266,12 +3757,40 @@ int main(int argc, char **argv) TEST_CASE(hashhash11); // #60: #define x # # # TEST_CASE(hashhash12); TEST_CASE(hashhash13); + TEST_CASE(hashhash_string_literal); + TEST_CASE(hashhash_string_wrapped); + TEST_CASE(hashhash_char_literal); + TEST_CASE(hashhash_multichar_literal); + TEST_CASE(hashhash_char_escaped); + TEST_CASE(hashhash_string_nothing); + TEST_CASE(hashhash_string_char); + TEST_CASE(hashhash_string_name); + TEST_CASE(hashhashhash_int_literal); + TEST_CASE(hashhash_int_literal); TEST_CASE(hashhash_invalid_1); TEST_CASE(hashhash_invalid_2); + TEST_CASE(hashhash_invalid_string_number); + TEST_CASE(hashhash_invalid_missing_args); + TEST_CASE(hashhash_null_stmt); + TEST_CASE(hashhash_empty_va_args); + // C standard, 5.1.1.2, paragraph 4: + // If a character sequence that matches the syntax of a universal + // character name is produced by token concatenation (6.10.3.3), + // the behavior is undefined." + TEST_CASE(hashhash_universal_character); // c++17 __has_include TEST_CASE(has_include_1); TEST_CASE(has_include_2); + TEST_CASE(has_include_3); + TEST_CASE(has_include_4); + TEST_CASE(has_include_5); + TEST_CASE(has_include_6); + + TEST_CASE(strict_ansi_1); + TEST_CASE(strict_ansi_2); + TEST_CASE(strict_ansi_3); + TEST_CASE(strict_ansi_4); TEST_CASE(ifdef1); TEST_CASE(ifdef2); @@ -2285,6 +3804,7 @@ int main(int argc, char **argv) TEST_CASE(ifDefinedInvalid1); TEST_CASE(ifDefinedInvalid2); TEST_CASE(ifDefinedHashHash); + TEST_CASE(ifDefinedHashHash2); TEST_CASE(ifLogical); TEST_CASE(ifSizeof); TEST_CASE(elif); @@ -2293,16 +3813,27 @@ int main(int argc, char **argv) TEST_CASE(ifdiv0); TEST_CASE(ifalt); // using "and", "or", etc TEST_CASE(ifexpr); + TEST_CASE(ifUndefFuncStyleMacro); TEST_CASE(location1); TEST_CASE(location2); TEST_CASE(location3); TEST_CASE(location4); + TEST_CASE(location5); + TEST_CASE(location6); + TEST_CASE(location7); + TEST_CASE(location8); + TEST_CASE(location9); + TEST_CASE(location10); + TEST_CASE(location11); TEST_CASE(missingHeader1); TEST_CASE(missingHeader2); TEST_CASE(missingHeader3); + TEST_CASE(missingHeader4); TEST_CASE(nestedInclude); + TEST_CASE(systemInclude); + TEST_CASE(circularInclude); TEST_CASE(nullDirective1); TEST_CASE(nullDirective2); @@ -2316,6 +3847,7 @@ int main(int argc, char **argv) TEST_CASE(include6); // invalid code: #include MACRO(,) TEST_CASE(include7); // #include MACRO TEST_CASE(include8); // #include MACRO(X) + TEST_CASE(include9); // #include MACRO TEST_CASE(multiline1); TEST_CASE(multiline2); @@ -2336,6 +3868,7 @@ int main(int argc, char **argv) TEST_CASE(readfile_cpp14_number); TEST_CASE(readfile_unhandled_chars); TEST_CASE(readfile_error); + TEST_CASE(readfile_file_not_found); TEST_CASE(stringify1); @@ -2351,7 +3884,9 @@ int main(int argc, char **argv) // utf/unicode TEST_CASE(utf8); + TEST_CASE(utf8_invalid); TEST_CASE(unicode); + TEST_CASE(unicode_invalid); TEST_CASE(warning); @@ -2362,5 +3897,36 @@ int main(int argc, char **argv) TEST_CASE(preprocessSizeOf); + TEST_CASE(timeDefine); + TEST_CASE(dateDefine); + + TEST_CASE(stdcVersionDefine); + TEST_CASE(cpluscplusDefine); + TEST_CASE(invalidStd); + TEST_CASE(stdEnum); + TEST_CASE(stdValid); + + TEST_CASE(token); + + TEST_CASE(preprocess_files); + + TEST_CASE(tokenlist_api); + + TEST_CASE(isAbsolutePath); + + TEST_CASE(bad_macro_syntax); + + TEST_CASE(ifCond); + TEST_CASE(macroUsage); + + TEST_CASE(fuzz_crash); + + TEST_CASE(leak); +} + +int main(int argc, char **argv) +{ + runTests(argc, argv, Input::Stringstream); + runTests(argc, argv, Input::CharBuffer); return numberOfFailedAssertions > 0 ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/testsuite/clang-preprocessor-tests/macro_fn_va_opt.c b/testsuite/clang-preprocessor-tests/macro_fn_va_opt.c new file mode 100644 index 00000000..ccb09e95 --- /dev/null +++ b/testsuite/clang-preprocessor-tests/macro_fn_va_opt.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -E %s | grep '^ printf( "%%s" , "Hello" );$' + +#define P( x, ...) printf( x __VA_OPT__(,) __VA_ARGS__ ) +#define PF( x, ...) P( x __VA_OPT__(,) __VA_ARGS__ ) + +int main() +{ + PF( "%s", "Hello" ); + PF( "Hello", ); + PF( "Hello" ); + PF( , ); + PF( ); +} diff --git a/testutils.py b/testutils.py new file mode 100644 index 00000000..55a2686d --- /dev/null +++ b/testutils.py @@ -0,0 +1,57 @@ +import os +import subprocess +import json + +def __run_subprocess(args, env=None, cwd=None, timeout=None): + p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, cwd=cwd) + + try: + stdout, stderr = p.communicate(timeout=timeout) + return_code = p.returncode + p = None + except subprocess.TimeoutExpired: + import psutil + # terminate all the child processes + child_procs = psutil.Process(p.pid).children(recursive=True) + if len(child_procs) > 0: + for child in child_procs: + child.terminate() + try: + # call with timeout since it might be stuck + p.communicate(timeout=5) + p = None + except subprocess.TimeoutExpired: + pass + raise + finally: + if p: + # sending the signal to the process groups causes the parent Python process to terminate as well + #os.killpg(os.getpgid(p.pid), signal.SIGTERM) # Send the signal to all the process groups + p.terminate() + stdout, stderr = p.communicate() + p = None + + stdout = stdout.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') + stderr = stderr.decode(encoding='utf-8', errors='ignore').replace('\r\n', '\n') + + return return_code, stdout, stderr + +def simplecpp(args = [], cwd = None): + dir_path = os.path.dirname(os.path.realpath(__file__)) + if 'SIMPLECPP_EXE_PATH' in os.environ: + simplecpp_path = os.environ['SIMPLECPP_EXE_PATH'] + else: + simplecpp_path = os.path.join(dir_path, "simplecpp") + return __run_subprocess([simplecpp_path] + args, cwd = cwd) + +def quoted_string(s): + return json.dumps(str(s)) + +def format_include_path_arg(include_path): + return f"-I{str(include_path)}" + +def format_include(include, is_sys_header=False): + if is_sys_header: + return f"<{quoted_string(include)[1:-1]}>" + else: + return quoted_string(include)