From 0c1b616d7d9cec098575265f0417e811a8ace78e Mon Sep 17 00:00:00 2001 From: Pelumi Caleb Ogunwale Date: Tue, 7 Apr 2026 21:14:52 -0400 Subject: [PATCH] build cwe_helpers script --- .gitignore | 2 + .python-version | 1 + build_cwe_helpers.py | 149 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 .python-version create mode 100644 build_cwe_helpers.py diff --git a/.gitignore b/.gitignore index 4dbe45b8d28a..638d6d4385a1 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,5 @@ node_modules/ # Mergetool files *.orig + +/cwe_helpers diff --git a/.python-version b/.python-version new file mode 100644 index 000000000000..eae0123defaa --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10.19 diff --git a/build_cwe_helpers.py b/build_cwe_helpers.py new file mode 100644 index 000000000000..388ff0806f3e --- /dev/null +++ b/build_cwe_helpers.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +build_cwe_helpers.py + +Scans all supported language security folders in this CodeQL repo and +reorganises them into: + + cwe_helpers/ + / + / + ... original files ... + +Files are COPIED (not moved) so the original tree is untouched. +""" + +import os +import shutil +from pathlib import Path + +# --------------------------------------------------------------------------- +# Repository root (directory containing this script) +# --------------------------------------------------------------------------- +REPO_ROOT = Path(__file__).parent.resolve() +OUTPUT_ROOT = REPO_ROOT / "cwe_helpers" + +# --------------------------------------------------------------------------- +# Language definitions +# +# Each entry: +# "name" – short label used as the per-language subfolder name +# "base_dir" – path (relative to REPO_ROOT) that CONTAINS the CWE-* folders +# "pattern" – glob pattern to find CWE subdirectories inside base_dir +# --------------------------------------------------------------------------- +LANGUAGES = [ + { + "name": "cpp", + # cpp/ql/src/Security/CWE/CWE-XXX/ + "base_dir": Path("cpp/ql/src/Security/CWE"), + }, + { + "name": "java", + # java/ql/src/Security/CWE/CWE-XXX/ + "base_dir": Path("java/ql/src/Security/CWE"), + }, + { + "name": "csharp", + # csharp/ql/src/Security Features/CWE-XXX/ (note the space) + "base_dir": Path("csharp/ql/src/Security Features"), + }, + { + "name": "javascript", + # javascript/ql/src/Security/CWE-XXX/ + "base_dir": Path("javascript/ql/src/Security"), + }, + { + "name": "python", + # python/ql/src/Security/CWE-XXX/ + "base_dir": Path("python/ql/src/Security"), + }, + { + "name": "go", + # go/ql/src/Security/CWE-XXX/ + "base_dir": Path("go/ql/src/Security"), + }, + { + "name": "rust", + # rust/ql/src/queries/security/CWE-XXX/ + "base_dir": Path("rust/ql/src/queries/security"), + }, + { + "name": "swift", + # swift/ql/src/queries/Security/CWE-XXX/ + "base_dir": Path("swift/ql/src/queries/Security"), + }, + { + "name": "ruby", + # ruby/ql/src/queries/security/cwe-XXX/ (lowercase names) + "base_dir": Path("ruby/ql/src/queries/security"), + }, +] + + +def normalise_cwe_name(folder_name: str) -> str: + """ + Normalise a CWE folder name to uppercase so that, e.g., + Ruby's 'cwe-079' maps to the same bucket as 'CWE-079'. + """ + return folder_name.upper() + + +def is_cwe_folder(path: Path) -> bool: + """Return True if *path* is a directory whose name starts with CWE- (case-insensitive).""" + return path.is_dir() and path.name.upper().startswith("CWE-") + + +def copy_tree(src: Path, dst: Path) -> None: + """ + Recursively copy *src* directory into *dst*. + If *dst* already exists its contents are merged/overwritten. + """ + dst.mkdir(parents=True, exist_ok=True) + for item in src.iterdir(): + dest_item = dst / item.name + if item.is_dir(): + copy_tree(item, dest_item) + else: + shutil.copy2(item, dest_item) + + +def build_cwe_helpers() -> None: + print(f"Repository root : {REPO_ROOT}") + print(f"Output directory: {OUTPUT_ROOT}\n") + + total_copied = 0 + missing_bases: list[str] = [] + + for lang in LANGUAGES: + lang_name = lang["name"] + base_dir = REPO_ROOT / lang["base_dir"] + + if not base_dir.exists(): + missing_bases.append(f"{lang_name}: {base_dir}") + print(f" [SKIP] {lang_name:12s} — base directory not found: {base_dir}") + continue + + cwe_dirs = sorted(p for p in base_dir.iterdir() if is_cwe_folder(p)) + + if not cwe_dirs: + print(f" [SKIP] {lang_name:12s} — no CWE folders found under {base_dir}") + continue + + print(f" [OK] {lang_name:12s} — found {len(cwe_dirs)} CWE folder(s)") + + for cwe_dir in cwe_dirs: + cwe_tag = normalise_cwe_name(cwe_dir.name) + dest = OUTPUT_ROOT / cwe_tag / lang_name + copy_tree(cwe_dir, dest) + total_copied += 1 + + print(f"\nDone. Copied {total_copied} CWE/language combinations into '{OUTPUT_ROOT.name}/'.") + + if missing_bases: + print("\nThe following language base directories were not found and were skipped:") + for m in missing_bases: + print(f" {m}") + + +if __name__ == "__main__": + build_cwe_helpers()