diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..22a68fc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,29 @@ +--- +name: Bug report +about: Report a false positive, false negative, or crash in a query +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear description of what is wrong. Is this a false positive, false negative, crash, or something else? + +**Query involved** +Which query or pack is affected (for example, `trailofbits/cpp-queries` or the specific query ID reported in the alert). + +**To reproduce** +1. CodeQL CLI version (`codeql --version`): +2. Language/database under analysis: +3. Exact command invocation: +4. Minimal code snippet or repository link that triggers the issue: + +**Expected behavior** +What you expected the query to report (or not report). + +**Actual behavior** +What the query actually reported, including alert locations and any error output. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/request_feature.md b/.github/ISSUE_TEMPLATE/request_feature.md new file mode 100644 index 0000000..11fc491 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/request_feature.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..eb2186f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +version: 2 + +updates: + - package-ecosystem: github-actions + cooldown: + default-days: 7 + directory: / + groups: + actions: + patterns: + - "*" + schedule: + interval: daily diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..5e0e07c --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,27 @@ +name: Publish CodeQL packs + +on: + release: + types: [published] + +permissions: {} + +jobs: + publish: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: trailofbits/setup-codeql@615e3864087261d42cce229e3eec419ab9b22c36 # main + with: + version: '2.25.1' + platform: 'linux64' + checksum: '4f070e6cc7009e75aec307ed109c2fcf0501e579c20a31080b893e31209523d5' + - run: make test + - run: make publish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e64326e..e3b766a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,15 +6,24 @@ on: branches: - main +permissions: {} + jobs: main: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: trailofbits/setup-codeql@main + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: trailofbits/setup-codeql@615e3864087261d42cce229e3eec419ab9b22c36 # main with: - version: '2.23.8' + version: '2.25.1' platform: 'linux64' - checksum: 'e61bc8aa8d86d45acd9d1c36629a12bbfb3365cd07a31666a2ebc91c6a1940b2' + checksum: '4f070e6cc7009e75aec307ed109c2fcf0501e579c20a31080b893e31209523d5' + - uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - run: make format-check - run: make test + - name: Verify doc/QUERIES.md is up to date + run: | + make generate-table + git diff --exit-code doc/QUERIES.md diff --git a/CLAUDE.md b/CLAUDE.md index 92ccf1c..606519c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -53,14 +53,19 @@ Test `.cpp` files include stubs via relative paths: Stubs only need enough declarations for CodeQL to resolve types and function names — no implementations required. -## Updating README Query Tables +## Updating Query Tables -When a query is added, removed, or its metadata changes, regenerate the README tables: +When a query is added, removed, or its metadata changes, regenerate `doc/QUERIES.md`: ```sh -python ./scripts/queries_table_generator.py 2>/dev/null +make generate-table ``` -This reads query metadata from all "full" suites and outputs markdown tables. Copy-paste the output into `README.md` under the `## Queries` section. +This reads query metadata from all "full" suites and writes markdown tables to `doc/QUERIES.md`. The file is generated — do not hand-edit it. + +The accompanying per-query markdown docs in `/src/docs/` are regenerated from each query's `.qhelp` file with: +```sh +make generate-help +``` ## Qlpack Versioning diff --git a/Makefile b/Makefile index 64c360d..8423361 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,9 @@ format-check: find . \( -iname '*.ql' -o -iname '*.qll' \) -print0 | \ xargs -0 codeql query format --check-only +download: + codeql pack download trailofbits/cpp-all trailofbits/cpp-queries trailofbits/go-queries trailofbits/java-queries + pack-install: find . -iname "qlpack.yml" -exec \ sh -c 'codeql pack install $$(dirname "$$1")' sh {} \; @@ -18,4 +21,20 @@ pack-upgrade: find . -iname "qlpack.yml" -exec \ sh -c 'codeql pack upgrade $$(dirname "$$1")' sh {} \; -.PHONY: test format format-check pack-install pack-upgrade +generate-table: + uv run --with pyyaml \ + python ./scripts/queries_table_generator.py > doc/QUERIES.md.tmp + mv doc/QUERIES.md.tmp doc/QUERIES.md + +generate-help: + codeql generate query-help ./cpp/src/ --format=markdown --output ./cpp/src/docs + codeql generate query-help ./go/src/ --format=markdown --output ./go/src/docs + codeql generate query-help ./java/src/ --format=markdown --output ./java/src/docs + +publish: + codeql pack publish cpp/lib/ + codeql pack publish cpp/src/ + codeql pack publish go/src/ + codeql pack publish java/src/ + +.PHONY: test format format-check download pack-install pack-upgrade generate-table generate-help publish diff --git a/README.md b/README.md index 8488b85..e9d3edc 100644 --- a/README.md +++ b/README.md @@ -2,88 +2,28 @@ This repository contains CodeQL queries developed by Trail of Bits and made available to the public. They are part of our ongoing development efforts and are used in our security audits, vulnerability research, and internal projects. They will evolve over time as we identify new techniques. -## Using custom CodeQL queries +## Setup -The easiest is to [download all packs](https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/publishing-and-using-codeql-packs#running-codeql-pack-download-scopepack) from the GitHub registry: -```sh -codeql pack download trailofbits/cpp-queries trailofbits/go-queries +```bash +make download +codeql resolve packs | grep trailofbits ``` -Then verify that new queries are installed: -```sh -codeql resolve qlpacks | grep trailofbits -``` +See [QUERIES.md](doc/QUERIES.md) for the full list of queries. -And use the queries for analysis: -```sh +## Usage + +```bash codeql database analyze database.db --format=sarif-latest --output=./tob.sarif -- trailofbits/cpp-queries -# or codeql database analyze database.db --format=sarif-latest --output=./tob.sarif -- trailofbits/go-queries +codeql database analyze database.db --format=sarif-latest --output=./tob.sarif -- trailofbits/java-queries ``` -## Queries - -### C and C++ - -#### Cryptography - -| Name | Description | Severity | Precision | -| --- | ----------- | :----: | :--------: | -|[BN_CTX_free called before BN_CTX_end](./cpp/src/docs/crypto/BnCtxFreeBeforeEnd.md)|Detects BN_CTX_free called before BN_CTX_end, which violates the required lifecycle|error|medium| -|[Unbalanced BN_CTX_start and BN_CTX_end pair](./cpp/src/docs/crypto/UnbalancedBnCtx.md)|Detects if one call in the BN_CTX_start/BN_CTX_end pair is missing|warning|medium| -|[Crypto variable initialized using static key](./cpp/src/docs/crypto/StaticKeyFlow.md)|Finds crypto variables initialized using static keys|error|high| -|[Crypto variable initialized using static password](./cpp/src/docs/crypto/StaticPasswordFlow.md)|Finds crypto variables initialized using static passwords|error|high| -|[Crypto variable initialized using weak randomness](./cpp/src/docs/crypto/WeakRandomnessTaint.md)|Finds crypto variables initialized using weak randomness|error|high| -|[Invalid key size](./cpp/src/docs/crypto/InvalidKeySize.md)|Tests if keys passed to EVP_EncryptInit and EVP_EncryptInit_ex have the same size as the key size of the cipher used|warning|medium| -|[Memory leak related to custom allocator](./cpp/src/docs/crypto/CustomAllocatorLeak.md)|Finds memory leaks from custom allocated memory|warning|medium| -|[Memory use after free related to custom allocator](./cpp/src/docs/crypto/CustomAllocatorUseAfterFree.md)|Finds use-after-frees related to custom allocators like `BN_new`|warning|medium| -|[Missing OpenSSL engine initialization](./cpp/src/docs/crypto/MissingEngineInit.md)|Finds created OpenSSL engines that may not be properly initialized|warning|medium| -|[Missing error handling](./cpp/src/docs/crypto/MissingErrorHandling.md)|Checks if returned error codes are properly checked|warning|high| -|[Missing zeroization of potentially sensitive random BIGNUM](./cpp/src/docs/crypto/MissingZeroization.md)|Determines if random bignums are properly zeroized|warning|medium| -|[Random buffer too small](./cpp/src/docs/crypto/RandomBufferTooSmall.md)|Finds buffer overflows in calls to CSPRNGs|warning|high| -|[Use of legacy cryptographic algorithm](./cpp/src/docs/crypto/UseOfLegacyAlgorithm.md)|Detects potential instantiations of legacy cryptographic algorithms|warning|medium| - -#### Security - -| Name | Description | Severity | Precision | -| --- | ----------- | :----: | :--------: | -|[Async unsafe signal handler](./cpp/src/docs/security/AsyncUnsafeSignalHandler/AsyncUnsafeSignalHandler.md)|Async unsafe signal handler (like the one used in CVE-2024-6387)|warning|high| -|[Decrementation overflow when comparing](./cpp/src/docs/security/DecOverflowWhenComparing/DecOverflowWhenComparing.md)|This query finds unsigned integer overflows resulting from unchecked decrementation during comparison.|error|high| -|[Find all problematic implicit casts](./cpp/src/docs/security/UnsafeImplicitConversions/UnsafeImplicitConversions.md)|Find all implicit casts that may be problematic. That is, casts that may result in unexpected truncation, reinterpretation or widening of values.|error|high| -|[Inconsistent handling of return values from a specific function](./cpp/src/docs/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.md)|Detects functions whose return values are compared inconsistently across call sites, which may indicate bugs|warning|medium| -|[Invalid string size passed to string manipulation function](./cpp/src/docs/security/CStrnFinder/CStrnFinder.md)|Finds calls to functions that take as input a string and its size as separate arguments (e.g., `strncmp`, `strncat`, ...) and the size argument is wrong|error|low| -|[Iterator invalidation](./cpp/src/docs/security/IteratorInvalidation/IteratorInvalidation.md)|Modifying a container while iterating over it can invalidate iterators, leading to undefined behavior.|warning|medium| -|[Missing null terminator](./cpp/src/docs/security/NoNullTerminator/NoNullTerminator.md)|This query finds incorrectly initialized strings that are passed to functions expecting null-byte-terminated strings|error|high| - -### Go - -#### Cryptography - -| Name | Description | Severity | Precision | -| --- | ----------- | :----: | :--------: | -|[Message not hashed before signature verification](./go/src/docs/crypto/MsgNotHashedBeforeSigVerfication/MsgNotHashedBeforeSigVerfication.md)|Detects calls to (EC)DSA APIs with a message that was not hashed. If the message is longer than the expected hash digest size, it is silently truncated|error|medium| - -#### Security - -| Name | Description | Severity | Precision | -| --- | ----------- | :----: | :--------: | -|[Invalid file permission parameter](./go/src/docs/security/FilePermsFlaws/FilePermsFlaws.md)|Finds non-octal (e.g., `755` vs `0o755`) and unsupported (e.g., `04666`) literals used as a filesystem permission parameter (`FileMode`)|error|medium| -|[Missing MinVersion in tls.Config](./go/src/docs/security/MissingMinVersionTLS/MissingMinVersionTLS.md)|Finds uses of tls.Config where MinVersion is not set and the project's minimum Go version (from go.mod) indicates insecure defaults: Go < 1.18 for clients or Go < 1.22 for servers. Does not mark explicitly set versions (including explicitly insecure ones).|error|medium| -|[Trim functions misuse](./go/src/docs/security/TrimMisuse/TrimMisuse.md)|Finds calls to `string.{Trim,TrimLeft,TrimRight}` with the 2nd argument not being a cutset but a continuous substring to be trimmed|error|low| - -### Java-kotlin - -#### Security - -| Name | Description | Severity | Precision | -| --- | ----------- | :----: | :--------: | -|[Recursive functions](./java-kotlin/src/docs/security/Recursion/Recursion.md)|Detects possibly unbounded recursive calls|warning|low| - ## Query suites -CodeQL queries are grouped into "suites". To execute queries from a specific suit add its name after a colon: `trailofbits/cpp-queries:codeql-suites/tob-cpp-full.qls`. +CodeQL queries are grouped into suites. To execute queries from a specific suite add its name after a colon: `trailofbits/cpp-queries:codeql-suites/tob-cpp-full.qls`. -The recommended suit - `tob-cpp-code-scanning.qls` - is chosen and executed when you do not explicitly specify any suit. Other suits in this repository are: +The recommended suite - `tob-cpp-code-scanning.qls` - is chosen and executed when you do not explicitly specify any suite. Other suites in this repository are: * `tob--crypto.qls` - queries targeting cryptographic vulnerabilities * `tob--security.qls` - queries targeting standard security issues @@ -91,48 +31,25 @@ The recommended suit - `tob-cpp-code-scanning.qls` - is chosen and executed when ## Development -#### Prepare environment +### Prepare environment + +Configure global CodeQL's search path: -Clone this repository and configure global CodeQL's search path: -```sh -git clone git@github.com:trailofbits/codeql-queries.git +```bash +git clone https://github.com/trailofbits/codeql-queries mkdir -p "${HOME}/.config/codeql/" echo "--search-path '$PWD/codeql-queries'" > "${HOME}/.config/codeql/config" -``` - -Check that CodeQL CLI detects the new qlpacks: -```sh -codeql resolve packs | grep trailofbits -``` -#### Before committing +cd codeql-queries/ +make pack-install -Run tests: - -```sh -make test -``` - -Format queries: - -```sh -make format -``` - -Install dependencies: - -```sh -make install +codeql resolve packs | grep trailofbits ``` -Generate query tables and copy-paste it to README.md file -```sh -python ./scripts/queries_table_generator.py 2>/dev/null -``` +### Before committing -Generate markdown query help files -```sh -codeql generate query-help ./cpp/src/ --format=markdown --output ./cpp/src/docs -codeql generate query-help ./go/src/ --format=markdown --output ./go/src/docs -codeql generate query-help ./java/src/ --format=markdown --output ./java/src/docs +```bash +make pack-upgrade +make test format +make generate-table generate-help ``` diff --git a/cpp/lib/codeql-pack.lock.yml b/cpp/lib/codeql-pack.lock.yml index 78b4cc1..3ad3544 100644 --- a/cpp/lib/codeql-pack.lock.yml +++ b/cpp/lib/codeql-pack.lock.yml @@ -2,27 +2,27 @@ lockVersion: 1.0.0 dependencies: codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/cpp-all: - version: 6.1.3 + version: 9.0.0 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/quantum: - version: 0.0.16 + version: 0.0.25 codeql/rangeanalysis: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typeflow: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 codeql/xml: - version: 1.0.38 + version: 1.0.47 compiled: false diff --git a/cpp/src/codeql-pack.lock.yml b/cpp/src/codeql-pack.lock.yml index 78b4cc1..3ad3544 100644 --- a/cpp/src/codeql-pack.lock.yml +++ b/cpp/src/codeql-pack.lock.yml @@ -2,27 +2,27 @@ lockVersion: 1.0.0 dependencies: codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/cpp-all: - version: 6.1.3 + version: 9.0.0 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/quantum: - version: 0.0.16 + version: 0.0.25 codeql/rangeanalysis: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typeflow: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 codeql/xml: - version: 1.0.38 + version: 1.0.47 compiled: false diff --git a/cpp/src/crypto/BnCtxFreeBeforeEnd.qhelp b/cpp/src/crypto/BnCtxFreeBeforeEnd.qhelp new file mode 100644 index 0000000..cd3eb45 --- /dev/null +++ b/cpp/src/crypto/BnCtxFreeBeforeEnd.qhelp @@ -0,0 +1,45 @@ + + + +

This query identifies instances where BN_CTX_free is called before BN_CTX_end, which violates the required lifecycle of OpenSSL's BN_CTX objects.

+ +

In OpenSSL, the proper lifecycle for using BN_CTX with nested allocations is:

+ +
    +
  1. BN_CTX_start(ctx) - marks the start of a nested allocation
  2. +
  3. Use BN_CTX_get(ctx) to get temporary BIGNUMs
  4. +
  5. BN_CTX_end(ctx) - releases the temporary BIGNUMs allocated since the matching BN_CTX_start
  6. +
  7. BN_CTX_free(ctx) - frees the entire context
  8. +
+ +

Calling BN_CTX_free before BN_CTX_end can lead to corrupted state or undefined behavior, as temporary BIGNUMs allocated via BN_CTX_get are not properly released.

+ +

The following example would be flagged as an issue by the query:

+ +void compute(BIGNUM *result) { + BN_CTX *ctx = BN_CTX_new(); + BN_CTX_start(ctx); + + BIGNUM *tmp = BN_CTX_get(ctx); + // Perform computation using tmp + + // ERROR: BN_CTX_free called without calling BN_CTX_end first + BN_CTX_free(ctx); +} + +

The correct version should call BN_CTX_end before BN_CTX_free:

+ +void compute(BIGNUM *result) { + BN_CTX *ctx = BN_CTX_new(); + BN_CTX_start(ctx); + + BIGNUM *tmp = BN_CTX_get(ctx); + // Perform computation using tmp + + BN_CTX_end(ctx); // Properly release temporary BIGNUMs + BN_CTX_free(ctx); // Now safe to free the context +} +
+
diff --git a/cpp/src/crypto/CustomAllocatorLeak.qhelp b/cpp/src/crypto/CustomAllocatorLeak.qhelp new file mode 100644 index 0000000..41bd415 --- /dev/null +++ b/cpp/src/crypto/CustomAllocatorLeak.qhelp @@ -0,0 +1,26 @@ + + + +

This query identifies potential memory leaks from custom allocators like +BN_new.

+ +

The following example would be identified by the query as a potential memory +leak.

+ +int compute(BIGNUM* a) { + BIGNUM *b = BN_new(); + + // Perform computation on `a` and `b`. + + if (condition(a)) { + BN_free(b); + return a; + } + + // The BIGNUM `b` may leak here. + return a; +} +
+
diff --git a/cpp/src/crypto/CustomAllocatorUseAfterFree.qhelp b/cpp/src/crypto/CustomAllocatorUseAfterFree.qhelp new file mode 100644 index 0000000..f18a79d --- /dev/null +++ b/cpp/src/crypto/CustomAllocatorUseAfterFree.qhelp @@ -0,0 +1,26 @@ + + + +

This query identifies use-after-frees with custom allocators like BN_new.

+ +

The following code snippet would be identified as an issue by the query.

+ +BIGNUM* compute(BIGNUM* a) { + BIGNUM *b = BN_new(); + + if (condition(a)) { + // The BIGNUM `b` may be freed here. + BN_free(b); + } + // Potential use-after-free of `b` here. + if (condition(b)) { + BN_free(a); + a = BN_new(); + } + + return b; +} +
+
diff --git a/cpp/src/crypto/InvalidKeySize.qhelp b/cpp/src/crypto/InvalidKeySize.qhelp new file mode 100644 index 0000000..058032b --- /dev/null +++ b/cpp/src/crypto/InvalidKeySize.qhelp @@ -0,0 +1,17 @@ + + + +

The EVP_EncryptInit and EVP_EncryptInit_ex functions do not take the size of the key as input, and the implementation will simply read the expected number of bytes from the buffer. This query will check if the size of the key buffer passed as an argument to one of these functions is equal to the key size of the corresponding cipher.

+ +

The following code snippet is an example of an issue that would be identified by the query.

+ +unsigned char key[16]; // This should be 32 for a 256 bit key +RAND_bytes(key, sizeof(key)); + +unsigned char *iv = (unsigned char *)"0123456789ABCDEF"; // A fixed 16 byte IV + +EVP_EncryptInit_ex(EVP_CIPHER_CTX_new(), EVP_aes_256_cbc(), NULL, key, iv); +
+
diff --git a/cpp/src/crypto/MissingEngineInit.qhelp b/cpp/src/crypto/MissingEngineInit.qhelp new file mode 100644 index 0000000..1f5f64c --- /dev/null +++ b/cpp/src/crypto/MissingEngineInit.qhelp @@ -0,0 +1,14 @@ + + + +

This query identifies loaded OpenSSL engines which are not passed to both ENGINE_init and ENGINE_set_default. ENGINE_init should always be called when a new engine is loaded. It is generally good practice to also call ENGINE_set_default to ensure that the primitives defined by the engine are used by default.

+ +

The following code snippet would be flagged as an issue by the query.

+ +ENGINE* load_rdrand() { + return ENGINE_by_id("rdrand"); +} +
+
diff --git a/cpp/src/crypto/MissingErrorHandling.qhelp b/cpp/src/crypto/MissingErrorHandling.qhelp new file mode 100644 index 0000000..c8b19df --- /dev/null +++ b/cpp/src/crypto/MissingErrorHandling.qhelp @@ -0,0 +1,14 @@ + + + +

Many of the functions comprising the OpenSSL API return an integer error code where 1 typically signals success, and 0 signals failure. To ensure that the implementation is secure, these error codes must be checked and acted upon. This query attempts to identify locations where a returned error code is not checked by the codebase. In this context, checked means that the return value flows to the condition of a control-flow statement like an if-statement, a while-statement, or a switch-statement.

+ +

As an example, the following example function fails to check the return value from RAND_bytes, and would be flagged as an issue by the query.

+ +void generateEncryptionKey(unsigned char *key, size_t keySize) { + RAND_bytes(key, keySize); +} +
+
diff --git a/cpp/src/crypto/MissingZeroization.qhelp b/cpp/src/crypto/MissingZeroization.qhelp new file mode 100644 index 0000000..e4cad89 --- /dev/null +++ b/cpp/src/crypto/MissingZeroization.qhelp @@ -0,0 +1,19 @@ + + + +

Randomly generated BIGNUMs often represent sensitive data (e.g. like ECDSA nonces). These should be cleared as they go out of scope to ensure that sensitive data does not remain in memory longer than required.

+ +

This query identifies OpenSSL BIGNUMs which are initialized using BN_rand but are not zeroized using BN_clear or BN_clear_free before they go out of scope. The following example function would be flagged as an issue by the query.

+ +void compute() { + BIGNUM *n = BN_new(); + BN_rand(n, 128, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY); + + // Perform sensitive computation using `n`. + + BN_free(n); +} +
+
diff --git a/cpp/src/crypto/RandomBufferTooSmall.qhelp b/cpp/src/crypto/RandomBufferTooSmall.qhelp new file mode 100644 index 0000000..66df00c --- /dev/null +++ b/cpp/src/crypto/RandomBufferTooSmall.qhelp @@ -0,0 +1,17 @@ + + + +

This query finds buffer overflows in calls to CSPRNGs like RAND_bytes, RAND_bytes_ex, RAND_priv_bytes, and RAND_priv_bytes_ex. It is currently restricted to statically allocated buffers to allow us to easily determine the input buffer size, but could easily be extended to dynamically allocated buffers as well.

+ +

The following example code would be flagged as vulnerable by the query.

+ +#define KEY_SIZE 16 + +// ... + +unsigned char key[KEY_SIZE]; +int res = RAND_bytes(key, 32) +
+
diff --git a/cpp/src/crypto/StaticKeyFlow.qhelp b/cpp/src/crypto/StaticKeyFlow.qhelp new file mode 100644 index 0000000..c5c71ce --- /dev/null +++ b/cpp/src/crypto/StaticKeyFlow.qhelp @@ -0,0 +1,10 @@ + + + +

This query flags locations where a static buffer is used as an argument to a +function requiring strong, cryptographic level randomness (e.g. an encryption +key).

+
+
diff --git a/cpp/src/crypto/StaticPasswordFlow.qhelp b/cpp/src/crypto/StaticPasswordFlow.qhelp new file mode 100644 index 0000000..0d399ce --- /dev/null +++ b/cpp/src/crypto/StaticPasswordFlow.qhelp @@ -0,0 +1,8 @@ + + + +

This query flags locations where a static string is used as an argument to a function requiring a strong password (e.g. the password argument for a password-based key derivation function).

+
+
diff --git a/cpp/src/crypto/UnbalancedBnCtx.qhelp b/cpp/src/crypto/UnbalancedBnCtx.qhelp new file mode 100644 index 0000000..49df987 --- /dev/null +++ b/cpp/src/crypto/UnbalancedBnCtx.qhelp @@ -0,0 +1,56 @@ + + + +

This query detects unbalanced pairs of BN_CTX_start and BN_CTX_end calls in OpenSSL code. These functions must be used in matching pairs to properly manage temporary BIGNUM allocations within a BN_CTX context.

+ +

BN_CTX_start marks the beginning of a nested allocation scope, and BN_CTX_end releases all temporary BIGNUMs allocated via BN_CTX_get since the matching BN_CTX_start. Each call to BN_CTX_start must have a corresponding BN_CTX_end call, and vice versa.

+ +

Common issues include:

+ +
    +
  • Calling BN_CTX_start without a corresponding BN_CTX_end (memory leak of temporary allocations)
  • +
  • Calling BN_CTX_end without a corresponding BN_CTX_start (undefined behavior)
  • +
  • Missing BN_CTX_end in error paths
  • +
+ +

The following example would be flagged for missing BN_CTX_end:

+ +int compute(BN_CTX *ctx, BIGNUM *result) { + BN_CTX_start(ctx); + + BIGNUM *tmp1 = BN_CTX_get(ctx); + BIGNUM *tmp2 = BN_CTX_get(ctx); + + if (!tmp1 || !tmp2) { + // ERROR: Missing BN_CTX_end on error path + return 0; + } + + // Perform computation + + BN_CTX_end(ctx); + return 1; +} + +

The correct version ensures BN_CTX_end is called on all code paths:

+ +int compute(BN_CTX *ctx, BIGNUM *result) { + BN_CTX_start(ctx); + + BIGNUM *tmp1 = BN_CTX_get(ctx); + BIGNUM *tmp2 = BN_CTX_get(ctx); + + if (!tmp1 || !tmp2) { + BN_CTX_end(ctx); // Properly clean up on error path + return 0; + } + + // Perform computation + + BN_CTX_end(ctx); + return 1; +} +
+
diff --git a/cpp/src/crypto/UseOfLegacyAlgorithm.qhelp b/cpp/src/crypto/UseOfLegacyAlgorithm.qhelp new file mode 100644 index 0000000..d8c91ea --- /dev/null +++ b/cpp/src/crypto/UseOfLegacyAlgorithm.qhelp @@ -0,0 +1,29 @@ + + + +

This query finds uses of weak or deprecated cryptographic algorithms like MD5, SHA1, and DES. The query will flag calls to functions containing any of the following names:

+ +
    +
  • MD2
  • +
  • MD4
  • +
  • MD5
  • +
  • RIPEMD
  • +
  • SHA1
  • +
  • Streebog
  • +
  • Whirlpool
  • +
  • PBKDF1
  • +
  • ArcFour
  • +
  • Blowfish
  • +
  • CAST
  • +
  • DES
  • +
  • IDEA
  • +
  • Kasumi
  • +
  • Magma
  • +
  • RC2
  • +
  • RC4
  • +
  • TDEA
  • +
+
+
diff --git a/cpp/src/crypto/WeakRandomnessTaint.qhelp b/cpp/src/crypto/WeakRandomnessTaint.qhelp new file mode 100644 index 0000000..8321695 --- /dev/null +++ b/cpp/src/crypto/WeakRandomnessTaint.qhelp @@ -0,0 +1,10 @@ + + + +

This query finds crypto variables initialized using weak randomness like process IDs or timestamps.

+ +

Limitations: the query does not track weak randomness through a hash function, so keys derived by hashing a weak source are not flagged.

+
+
diff --git a/cpp/src/docs/crypto/BnCtxFreeBeforeEnd.md b/cpp/src/docs/crypto/BnCtxFreeBeforeEnd.md index a1a8256..fb4d50c 100644 --- a/cpp/src/docs/crypto/BnCtxFreeBeforeEnd.md +++ b/cpp/src/docs/crypto/BnCtxFreeBeforeEnd.md @@ -1,14 +1,12 @@ # BN_CTX_free called before BN_CTX_end - This query identifies instances where `BN_CTX_free` is called before `BN_CTX_end`, which violates the required lifecycle of OpenSSL's `BN_CTX` objects. In OpenSSL, the proper lifecycle for using `BN_CTX` with nested allocations is: 1. `BN_CTX_start(ctx)` - marks the start of a nested allocation -2. Use `BN_CTX_get(ctx)` to get temporary BIGNUMs -3. `BN_CTX_end(ctx)` - releases the temporary BIGNUMs allocated since the matching `BN_CTX_start` -4. `BN_CTX_free(ctx)` - frees the entire context - +1. Use `BN_CTX_get(ctx)` to get temporary BIGNUMs +1. `BN_CTX_end(ctx)` - releases the temporary BIGNUMs allocated since the matching `BN_CTX_start` +1. `BN_CTX_free(ctx)` - frees the entire context Calling `BN_CTX_free` before `BN_CTX_end` can lead to corrupted state or undefined behavior, as temporary BIGNUMs allocated via `BN_CTX_get` are not properly released. The following example would be flagged as an issue by the query: @@ -25,7 +23,6 @@ void compute(BIGNUM *result) { BN_CTX_free(ctx); } ``` - The correct version should call `BN_CTX_end` before `BN_CTX_free`: ```cpp diff --git a/cpp/src/docs/crypto/CustomAllocatorLeak.md b/cpp/src/docs/crypto/CustomAllocatorLeak.md index 28ac24e..e8828ef 100644 --- a/cpp/src/docs/crypto/CustomAllocatorLeak.md +++ b/cpp/src/docs/crypto/CustomAllocatorLeak.md @@ -1,10 +1,7 @@ -# Custom allocator leak +# Memory leak related to custom allocator +This query identifies potential memory leaks from custom allocators like `BN_new`. -This query identifies potential memory leaks from custom allocators like -`BN_new`. - -The following example would be identified by the query as a potential memory -leak. +The following example would be identified by the query as a potential memory leak. ```cpp int compute(BIGNUM* a) { @@ -20,4 +17,4 @@ int compute(BIGNUM* a) { // The BIGNUM `b` may leak here. return a; } -``` \ No newline at end of file +``` diff --git a/cpp/src/docs/crypto/CustomAllocatorUseAfterFree.md b/cpp/src/docs/crypto/CustomAllocatorUseAfterFree.md index 72ff6d8..f1490bd 100644 --- a/cpp/src/docs/crypto/CustomAllocatorUseAfterFree.md +++ b/cpp/src/docs/crypto/CustomAllocatorUseAfterFree.md @@ -1,5 +1,4 @@ -# Custom allocator use-after-free - +# Memory use after free related to custom allocator This query identifies use-after-frees with custom allocators like `BN_new`. The following code snippet would be identified as an issue by the query. diff --git a/cpp/src/docs/crypto/InvalidKeySize.md b/cpp/src/docs/crypto/InvalidKeySize.md index c057c5b..d424ece 100644 --- a/cpp/src/docs/crypto/InvalidKeySize.md +++ b/cpp/src/docs/crypto/InvalidKeySize.md @@ -1,13 +1,7 @@ # Invalid key size +The `EVP_EncryptInit` and `EVP_EncryptInit_ex` functions do not take the size of the key as input, and the implementation will simply read the expected number of bytes from the buffer. This query will check if the size of the key buffer passed as an argument to one of these functions is equal to the key size of the corresponding cipher. -The `EVP_EncryptInit` and `EVP_EncryptInit_ex` functions do not take the size -of the key as input, and the implementation will simply read the expected number -of bytes from the buffer. This query will check if the size of the key buffer -passed as an argument to one of these functions is equal to the key size of the -corresponding cipher. - -The following code snippet is an example of an issue that would be identified by -the query. +The following code snippet is an example of an issue that would be identified by the query. ```cpp unsigned char key[16]; // This should be 32 for a 256 bit key @@ -16,4 +10,4 @@ RAND_bytes(key, sizeof(key)); unsigned char *iv = (unsigned char *)"0123456789ABCDEF"; // A fixed 16 byte IV EVP_EncryptInit_ex(EVP_CIPHER_CTX_new(), EVP_aes_256_cbc(), NULL, key, iv); -``` \ No newline at end of file +``` diff --git a/cpp/src/docs/crypto/MissingEngineInit.md b/cpp/src/docs/crypto/MissingEngineInit.md index 7323570..f72827a 100644 --- a/cpp/src/docs/crypto/MissingEngineInit.md +++ b/cpp/src/docs/crypto/MissingEngineInit.md @@ -1,10 +1,5 @@ -# Missing engine initialization - -This query identifies loaded OpenSSL engines which are not passed to both -`ENGINE_init` and `ENGINE_set_default`. `ENGINE_init` should always be called -when a new engine is loaded. It is generally good practise to also call -`ENGINE_set_default` to ensure that the primitives defined by the engine are -used by default. +# Missing OpenSSL engine initialization +This query identifies loaded OpenSSL engines which are not passed to both `ENGINE_init` and `ENGINE_set_default`. `ENGINE_init` should always be called when a new engine is loaded. It is generally good practice to also call `ENGINE_set_default` to ensure that the primitives defined by the engine are used by default. The following code snippet would be flagged as an issue by the query. @@ -12,4 +7,4 @@ The following code snippet would be flagged as an issue by the query. ENGINE* load_rdrand() { return ENGINE_by_id("rdrand"); } -``` \ No newline at end of file +``` diff --git a/cpp/src/docs/crypto/MissingErrorHandling.md b/cpp/src/docs/crypto/MissingErrorHandling.md index 49a613c..f06cf8c 100644 --- a/cpp/src/docs/crypto/MissingErrorHandling.md +++ b/cpp/src/docs/crypto/MissingErrorHandling.md @@ -1,18 +1,10 @@ -# Checking if returned error codes are acted on +# Missing error handling +Many of the functions comprising the OpenSSL API return an integer error code where 1 typically signals success, and 0 signals failure. To ensure that the implementation is secure, these error codes must be checked and acted upon. This query attempts to identify locations where a returned error code is not checked by the codebase. In this context, checked means that the return value flows to the condition of a control-flow statement like an if-statement, a while-statement, or a switch-statement. -Many of the functions comprising the OpenSSL API return an integer error code -where 1 typically signals success, and 0 signals failure. To ensure that the -implementation is secure, these error codes must be checked and acted upon. This -query attempts to identify locations where a returned error code is not checked -by the codebase. In this context, checked means that the return value flows -to the condition of a control-flow statement like an if-statement, a while- -statement, or a switch-statement. - -As an example, the following example function fails to check the return value -from `RAND_bytes`, and would be flagged as an issue by the query. +As an example, the following example function fails to check the return value from `RAND_bytes`, and would be flagged as an issue by the query. ```cpp void generateEncryptionKey(unsigned char *key, size_t keySize) { RAND_bytes(key, keySize); } -``` \ No newline at end of file +``` diff --git a/cpp/src/docs/crypto/MissingZeroization.md b/cpp/src/docs/crypto/MissingZeroization.md index 2540d30..41fdafb 100644 --- a/cpp/src/docs/crypto/MissingZeroization.md +++ b/cpp/src/docs/crypto/MissingZeroization.md @@ -1,21 +1,15 @@ -# Missing zeroization of random BIGNUMs +# Missing zeroization of potentially sensitive random BIGNUM +Randomly generated BIGNUMs often represent sensitive data (e.g. like ECDSA nonces). These should be cleared as they go out of scope to ensure that sensitive data does not remain in memory longer than required. -Randomly generated BIGNUMs often represent sensitive data (e.g. like ECDSA -nonces). These should be cleared as they go out of scope to ensure that -sensitive data does not remain in memory longer than required. - -This query identifies OpenSSL BIGNUMs which are inititialized using `BN_rand` -but not which are not zeroized using `BN_clear` before they go out of scope. The -following example function would be flagged as an issue by the query. +This query identifies OpenSSL BIGNUMs which are initialized using `BN_rand` but are not zeroized using `BN_clear` or `BN_clear_free` before they go out of scope. The following example function would be flagged as an issue by the query. ```cpp void compute() { BIGNUM *n = BN_new(); BN_rand(n, 128, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY); - + // Perform sensitive computation using `n`. BN_free(n); } ``` - diff --git a/cpp/src/docs/crypto/RandomBufferTooSmall.md b/cpp/src/docs/crypto/RandomBufferTooSmall.md index 0b307b6..1ac1d27 100644 --- a/cpp/src/docs/crypto/RandomBufferTooSmall.md +++ b/cpp/src/docs/crypto/RandomBufferTooSmall.md @@ -1,9 +1,5 @@ # Random buffer too small - -This query finds buffer overflows in calls to CSPRNGs like `RAND_bytes`, -`RAND_bytes_ex`, and `RAND_priv_bytes`. It is currently restricted to statically -allocated buffers to allow us to easily determine the input buffer size, but -could easily be extended to dynamically allocated buffers as well. +This query finds buffer overflows in calls to CSPRNGs like `RAND_bytes`, `RAND_bytes_ex`, `RAND_priv_bytes`, and `RAND_priv_bytes_ex`. It is currently restricted to statically allocated buffers to allow us to easily determine the input buffer size, but could easily be extended to dynamically allocated buffers as well. The following example code would be flagged as vulnerable by the query. @@ -14,4 +10,4 @@ The following example code would be flagged as vulnerable by the query. unsigned char key[KEY_SIZE]; int res = RAND_bytes(key, 32) -``` \ No newline at end of file +``` diff --git a/cpp/src/docs/crypto/StaticKeyFlow.md b/cpp/src/docs/crypto/StaticKeyFlow.md index b7af956..4e38eb1 100644 --- a/cpp/src/docs/crypto/StaticKeyFlow.md +++ b/cpp/src/docs/crypto/StaticKeyFlow.md @@ -1,5 +1,3 @@ -# Static key flow +# Crypto variable initialized using static key +This query flags locations where a static buffer is used as an argument to a function requiring strong, cryptographic level randomness (e.g. an encryption key). -This query flags locations where a static buffer is used as an argument to a -function requiring strong, cryptographic level randomness (e.g. an encryption -key). diff --git a/cpp/src/docs/crypto/StaticPasswordFlow.md b/cpp/src/docs/crypto/StaticPasswordFlow.md index 26ededf..070e776 100644 --- a/cpp/src/docs/crypto/StaticPasswordFlow.md +++ b/cpp/src/docs/crypto/StaticPasswordFlow.md @@ -1,5 +1,3 @@ -# Static password flow +# Crypto variable initialized using static password +This query flags locations where a static string is used as an argument to a function requiring a strong password (e.g. the password argument for a password-based key derivation function). -This query flags locations where a static string is used as an argument to a -function requiring a strong password (e.g. the password argument for a password- -based key derivation function). diff --git a/cpp/src/docs/crypto/UnbalancedBnCtx.md b/cpp/src/docs/crypto/UnbalancedBnCtx.md index 6ec9659..b371a85 100644 --- a/cpp/src/docs/crypto/UnbalancedBnCtx.md +++ b/cpp/src/docs/crypto/UnbalancedBnCtx.md @@ -1,15 +1,13 @@ # Unbalanced BN_CTX_start and BN_CTX_end pair - This query detects unbalanced pairs of `BN_CTX_start` and `BN_CTX_end` calls in OpenSSL code. These functions must be used in matching pairs to properly manage temporary BIGNUM allocations within a `BN_CTX` context. `BN_CTX_start` marks the beginning of a nested allocation scope, and `BN_CTX_end` releases all temporary BIGNUMs allocated via `BN_CTX_get` since the matching `BN_CTX_start`. Each call to `BN_CTX_start` must have a corresponding `BN_CTX_end` call, and vice versa. Common issues include: -- Calling `BN_CTX_start` without a corresponding `BN_CTX_end` (memory leak of temporary allocations) -- Calling `BN_CTX_end` without a corresponding `BN_CTX_start` (undefined behavior) -- Missing `BN_CTX_end` in error paths - +* Calling `BN_CTX_start` without a corresponding `BN_CTX_end` (memory leak of temporary allocations) +* Calling `BN_CTX_end` without a corresponding `BN_CTX_start` (undefined behavior) +* Missing `BN_CTX_end` in error paths The following example would be flagged for missing `BN_CTX_end`: ```cpp @@ -30,7 +28,6 @@ int compute(BN_CTX *ctx, BIGNUM *result) { return 1; } ``` - The correct version ensures `BN_CTX_end` is called on all code paths: ```cpp diff --git a/cpp/src/docs/crypto/UseOfLegacyAlgorithm.md b/cpp/src/docs/crypto/UseOfLegacyAlgorithm.md index 3a78418..9f24436 100644 --- a/cpp/src/docs/crypto/UseOfLegacyAlgorithm.md +++ b/cpp/src/docs/crypto/UseOfLegacyAlgorithm.md @@ -1,26 +1,21 @@ -# Legacy cryptographic algorithm +# Use of legacy cryptographic algorithm +This query finds uses of weak or deprecated cryptographic algorithms like `MD5`, `SHA1`, and `DES`. The query will flag calls to functions containing any of the following names: -This query finds uses of weak or depracated cryptographic algorithms like `MD5`, -`SHA1`, and `DES`. The query will flag calls to functions containing any of the -following names: - - - `MD2` - - `MD4` - - `MD5` - - `RIPEMD` - - `SHA1` - - `Streebog` - - `Whirlpool` - - - `PBKDF1` - - - `Arcfour` - - `Blowfish` - - `CAST` - - `DES` - - `IDEA` - - `Kasumi` - - `Magma` - - `RC2` - - `RC4` - - `TDEA` \ No newline at end of file +* `MD2` +* `MD4` +* `MD5` +* `RIPEMD` +* `SHA1` +* `Streebog` +* `Whirlpool` +* `PBKDF1` +* `ArcFour` +* `Blowfish` +* `CAST` +* `DES` +* `IDEA` +* `Kasumi` +* `Magma` +* `RC2` +* `RC4` +* `TDEA` diff --git a/cpp/src/docs/crypto/WeakRandomnessTaint.md b/cpp/src/docs/crypto/WeakRandomnessTaint.md index e9e3a6c..739d3eb 100644 --- a/cpp/src/docs/crypto/WeakRandomnessTaint.md +++ b/cpp/src/docs/crypto/WeakRandomnessTaint.md @@ -1,8 +1,5 @@ -# Weak randomness taint +# Crypto variable initialized using weak randomness +This query finds crypto variables initialized using weak randomness like process IDs or timestamps. -This query finds crypto variables initialized using weak randomness like process -IDs or timestamps. +Limitations: the query does not track weak randomness through a hash function, so keys derived by hashing a weak source are not flagged. -A long-term goal is to be able to find locations where weak randomness is hashed -to create cryptographic keys. This requires taint flow from hash function inputs -to the digest, which is currently not supported. diff --git a/cpp/src/docs/security/AsyncUnsafeSignalHandler/AsyncUnsafeSignalHandler.md b/cpp/src/docs/security/AsyncUnsafeSignalHandler/AsyncUnsafeSignalHandler.md index 23d71c9..e016c7a 100644 --- a/cpp/src/docs/security/AsyncUnsafeSignalHandler/AsyncUnsafeSignalHandler.md +++ b/cpp/src/docs/security/AsyncUnsafeSignalHandler/AsyncUnsafeSignalHandler.md @@ -1,12 +1,20 @@ # Async unsafe signal handler This is a CodeQL query constructed to find signal handlers that are performing async unsafe operations. -The kernel defines a list of async-safe signal functions in its [man page](https://man7.org/linux/man-pages/man7/signal-safety.7.html). Any signal handler that performs operations that are not safe asynchronously may be vulnerable. +Because a signal may be delivered at any moment, e.g., in the middle of a malloc, the handler shouldn't touch the program's internal state. + +The kernel defines a list of async-safe signal functions in its [man page](https://man7.org/linux/man-pages/man7/signal-safety.7.html). Any signal handler that performs operations that are not safe for asynchronous execution may be vulnerable. + +Moreover, signal handlers may be re-entered. Handlers' logic should take that possibility into account. + +If the issue is exploitable depends on attacker's ability to deliver the signal. Remote attacks may be limitted to some signals (like SIGALRM and SIGURG), while local attacks could use all signals. ## Recommendation Attempt to keep signal handlers as simple as possible. Only call async-safe functions from signal handlers. +Block delivery of new signals inside signal handlers to prevent handler re-entrancy issues. + ## Example @@ -55,6 +63,7 @@ In this example, while both syntatically valid, a correct handler is defined in ## References +* [Michal Zalewski, "Delivering Signals for Fun and Profit"](https://lcamtuf.coredump.cx/signals.txt) * [SEI CERT C Coding Standard "SIG30-C. Call only asynchronous-safe functions within signal handlers"](https://wiki.sei.cmu.edu/confluence/display/c/SIG30-C.+Call+only+asynchronous-safe+functions+within+signal+handlers) * [regreSSHion: RCE in OpenSSH's server, on glibc-based Linux systems](https://www.qualys.com/2024/07/01/cve-2024-6387/regresshion.txt) * [signal-safety - async-signal-safe functions](https://man7.org/linux/man-pages/man7/signal-safety.7.html) diff --git a/cpp/src/docs/security/CStrnFinder/CStrnFinder.md b/cpp/src/docs/security/CStrnFinder/CStrnFinder.md index 99a12c0..09b3df4 100644 --- a/cpp/src/docs/security/CStrnFinder/CStrnFinder.md +++ b/cpp/src/docs/security/CStrnFinder/CStrnFinder.md @@ -1,4 +1,4 @@ -# CStrNFinder +# Invalid string size passed to string manipulation function Some functions receive input buffer (strings, arrays, ...) and the buffer's size as separate arguments. For manually provided (hardcoded) sizes one may make simple mistakes resulting from typos or misunderstanding of fuctions' APIs. For example: * a hardcoded string's length may be incorrectly stated diff --git a/cpp/src/docs/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.md b/cpp/src/docs/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.md index a8e65b0..c1b8eee 100644 --- a/cpp/src/docs/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.md +++ b/cpp/src/docs/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.md @@ -11,7 +11,6 @@ The query categorizes each comparison into one of the following categories: * Another function's return value (e.g., `ret != other_func()`) * Passed as argument to another function (e.g., `if (check(ret))`) * Arithmetic expression - When at least 75% of a function's return value comparisons fall into one category, the remaining comparisons in a different category are flagged as potentially incorrect. @@ -20,6 +19,8 @@ Review each flagged call site and verify that the comparison matches the functio ## Example +In this example, `process_items` returns the number of items processed or `-1` on error. Most call sites correctly compare the return value with a numeric literal. However, one call site mistakenly compares it with `sizeof(struct header)`, which is inconsistent with how the return value is used everywhere else. + ```c struct header { @@ -60,9 +61,8 @@ void example() { result = process_items(items, 7); if (result > sizeof(struct header)) { /* wrong comparison */ } } -``` -In this example, `process_items` returns the number of items processed or `-1` on error. Most call sites correctly compare the return value with a numeric literal. However, one call site mistakenly compares it with `sizeof(struct header)`, which is inconsistent with how the return value is used everywhere else. +``` ## References * [CWE-252: Unchecked Return Value](https://cwe.mitre.org/data/definitions/252.html) diff --git a/cpp/src/docs/security/IteratorInvalidation/IteratorInvalidation.md b/cpp/src/docs/security/IteratorInvalidation/IteratorInvalidation.md index dcb45b7..c8a85a6 100644 --- a/cpp/src/docs/security/IteratorInvalidation/IteratorInvalidation.md +++ b/cpp/src/docs/security/IteratorInvalidation/IteratorInvalidation.md @@ -1,5 +1,5 @@ # Iterator invalidation -This is a CodeQL query that detects modifications to C++ containers while iterating over them. Such modifications can invalidate active iterators, leading to undefined behavior including use-after-free, out-of-bounds access, and crashes. +Detects modifications to C++ containers while iterating over them. Such modifications can invalidate active iterators, leading to undefined behavior including use-after-free, out-of-bounds access, and crashes. When a container is modified (e.g., calling `push_back` on a `std::vector` during a range-based for loop), existing iterators may become invalid. Continuing to use those iterators results in undefined behavior per the C++ Standard. @@ -9,11 +9,10 @@ The query tracks data flow between the iterated container and the modifying call ## Recommendation Avoid modifying containers while iterating over them. Common safe alternatives include: -- Collect modifications and apply them after the loop completes -- Use the erase-remove idiom for deletions -- Iterate over a copy of the container if modification is necessary -- Use index-based loops instead of iterator-based loops when the container supports random access - +* Collect modifications and apply them after the loop completes +* Use the erase-remove idiom for deletions +* Iterate over a copy of the container if modification is necessary +* Use index-based loops instead of iterator-based loops when the container supports random access ## Example @@ -41,6 +40,7 @@ void good_example(std::vector& vec) { vec.push_back(val); } } + ``` In the bad example, calling `push_back` inside the range-based for loop may cause the vector to reallocate, invalidating all iterators including the loop's internal iterator. The good example collects values to add and applies them after the loop. diff --git a/cpp/src/docs/security/UnsafeImplicitConversions/UnsafeImplicitConversions.md b/cpp/src/docs/security/UnsafeImplicitConversions/UnsafeImplicitConversions.md index ef2a505..42df846 100644 --- a/cpp/src/docs/security/UnsafeImplicitConversions/UnsafeImplicitConversions.md +++ b/cpp/src/docs/security/UnsafeImplicitConversions/UnsafeImplicitConversions.md @@ -1,9 +1,9 @@ -# Unsafe Implicit Conversions -Integer variables may be implicitly casted to a type of different size and signedness. If the variable is casted to a type of smaller bit-size or different signedness without a proper bound checking, then the casting may silently truncate the variable's value or make it semantically meaningless. This query finds implicit casts that cannot be proven to be safe. +# Find all problematic implicit casts +Integer variables may be implicitly casted to a type of different size and signedness. If the variable is casted to a type of smaller bit-size or different signedness without a proper bound checking, then the casting may silently change the variable's value or make it semantically meaningless. Since implicit casts are introduced by the compiler, developers may be not aware of them and the compiled code may behave incorrectly aka may have bugs. This query finds implicit casts that cannot be proven to be safe. Safe means that the input value is known to fit into destination type aka the value won't change. ## Recommendation -Either change variables types to avoid implicit conversions or verify that converting highlighted variables is always safe. +Either adjust types of problematic variables to avoid implicit conversions, make the code validate that converting the variables is safe, or add explicit conversions that would make the compiler avoid introducing implicit ones. ## Example @@ -35,5 +35,5 @@ int main(int argc, char *argv[]) { } ``` -In this example, the call to `malloc_wrapper` may silently truncate `large` variable, and so the allocated buffer will be of smaller size than the `test` function expects. +In this example, the call to `malloc_wrapper` may silently truncate `large` variable so that the allocated buffer will be of smaller size than the `test` function expects. diff --git a/cpp/src/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.ql b/cpp/src/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.ql index 2c8e1da..434894c 100644 --- a/cpp/src/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.ql +++ b/cpp/src/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.ql @@ -7,6 +7,7 @@ * @precision medium * @id tob/cpp/inconsistent-retval-handling * @tags security + * @group security */ import cpp diff --git a/cpp/src/security/IteratorInvalidation/IteratorInvalidation.cpp b/cpp/src/security/IteratorInvalidation/IteratorInvalidation.cpp new file mode 100644 index 0000000..2a76f1e --- /dev/null +++ b/cpp/src/security/IteratorInvalidation/IteratorInvalidation.cpp @@ -0,0 +1,23 @@ +#include + +// BAD: push_back during range-based for loop invalidates iterators +void bad_example(std::vector& vec) { + for (auto& elem : vec) { + if (elem < 0) { + vec.push_back(-elem); // invalidates iterators + } + } +} + +// GOOD: collect modifications, apply after loop +void good_example(std::vector& vec) { + std::vector to_add; + for (auto& elem : vec) { + if (elem < 0) { + to_add.push_back(-elem); + } + } + for (auto& val : to_add) { + vec.push_back(val); + } +} diff --git a/cpp/src/security/IteratorInvalidation/IteratorInvalidation.qhelp b/cpp/src/security/IteratorInvalidation/IteratorInvalidation.qhelp new file mode 100644 index 0000000..1bde5dd --- /dev/null +++ b/cpp/src/security/IteratorInvalidation/IteratorInvalidation.qhelp @@ -0,0 +1,35 @@ + + + +

Detects modifications to C++ containers while iterating over them. Such modifications can invalidate active iterators, leading to undefined behavior including use-after-free, out-of-bounds access, and crashes.

+ +

When a container is modified (e.g., calling push_back on a std::vector during a range-based for loop), existing iterators may become invalid. Continuing to use those iterators results in undefined behavior per the C++ Standard.

+ +

The query tracks data flow between the iterated container and the modifying call to ensure they operate on the same object.

+
+ + +

Avoid modifying containers while iterating over them. Common safe alternatives include:

+ +
    +
  • Collect modifications and apply them after the loop completes
  • +
  • Use the erase-remove idiom for deletions
  • +
  • Iterate over a copy of the container if modification is necessary
  • +
  • Use index-based loops instead of iterator-based loops when the container supports random access
  • +
+
+ + + + +

In the bad example, calling push_back inside the range-based for loop may cause the vector to reallocate, invalidating all iterators including the loop's internal iterator. The good example collects values to add and applies them after the loop.

+
+ + +
  • C++ Standard [container.requirements.general] — iterator invalidation rules per container type
  • +
  • CWE-416: Use After Free
  • +
  • Trail of Bits — Itergator: Iterator Invalidation Detector
  • +
    +
    diff --git a/cpp/src/security/UnsafeImplicitConversions/UnsafeImplicitConversions.ql b/cpp/src/security/UnsafeImplicitConversions/UnsafeImplicitConversions.ql index a6b3515..f7666fb 100644 --- a/cpp/src/security/UnsafeImplicitConversions/UnsafeImplicitConversions.ql +++ b/cpp/src/security/UnsafeImplicitConversions/UnsafeImplicitConversions.ql @@ -6,6 +6,7 @@ * @tags security * @problem.severity error * @precision high + * @group security */ import cpp diff --git a/cpp/test/codeql-pack.lock.yml b/cpp/test/codeql-pack.lock.yml index 78b4cc1..3ad3544 100644 --- a/cpp/test/codeql-pack.lock.yml +++ b/cpp/test/codeql-pack.lock.yml @@ -2,27 +2,27 @@ lockVersion: 1.0.0 dependencies: codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/cpp-all: - version: 6.1.3 + version: 9.0.0 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/quantum: - version: 0.0.16 + version: 0.0.25 codeql/rangeanalysis: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typeflow: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 codeql/xml: - version: 1.0.38 + version: 1.0.47 compiled: false diff --git a/doc/QUERIES.md b/doc/QUERIES.md new file mode 100644 index 0000000..7971538 --- /dev/null +++ b/doc/QUERIES.md @@ -0,0 +1,58 @@ + + +### C and C++ + +#### Cryptography + +| Name | Description | Severity | Precision | +| --- | ----------- | :----: | :--------: | +|[BN_CTX_free called before BN_CTX_end](../cpp/src/docs/crypto/BnCtxFreeBeforeEnd.md)|Detects BN_CTX_free called before BN_CTX_end, which violates the required lifecycle|error|medium| +|[Crypto variable initialized using static key](../cpp/src/docs/crypto/StaticKeyFlow.md)|Finds crypto variables initialized using static keys|error|high| +|[Crypto variable initialized using static password](../cpp/src/docs/crypto/StaticPasswordFlow.md)|Finds crypto variables initialized using static passwords|error|high| +|[Crypto variable initialized using weak randomness](../cpp/src/docs/crypto/WeakRandomnessTaint.md)|Finds crypto variables initialized using weak randomness|error|high| +|[Invalid key size](../cpp/src/docs/crypto/InvalidKeySize.md)|Tests if keys passed to EVP_EncryptInit and EVP_EncryptInit_ex have the same size as the key size of the cipher used|warning|medium| +|[Memory leak related to custom allocator](../cpp/src/docs/crypto/CustomAllocatorLeak.md)|Finds memory leaks from custom allocated memory|warning|medium| +|[Memory use after free related to custom allocator](../cpp/src/docs/crypto/CustomAllocatorUseAfterFree.md)|Finds use-after-frees related to custom allocators like `BN_new`|warning|medium| +|[Missing OpenSSL engine initialization](../cpp/src/docs/crypto/MissingEngineInit.md)|Finds created OpenSSL engines that may not be properly initialized|warning|medium| +|[Missing error handling](../cpp/src/docs/crypto/MissingErrorHandling.md)|Checks if returned error codes are properly checked|warning|high| +|[Missing zeroization of potentially sensitive random BIGNUM](../cpp/src/docs/crypto/MissingZeroization.md)|Determines if random bignums are properly zeroized|warning|medium| +|[Random buffer too small](../cpp/src/docs/crypto/RandomBufferTooSmall.md)|Finds buffer overflows in calls to CSPRNGs|warning|high| +|[Unbalanced BN_CTX_start and BN_CTX_end pair](../cpp/src/docs/crypto/UnbalancedBnCtx.md)|Detects if one call in the BN_CTX_start/BN_CTX_end pair is missing|warning|medium| +|[Use of legacy cryptographic algorithm](../cpp/src/docs/crypto/UseOfLegacyAlgorithm.md)|Detects potential instantiations of legacy cryptographic algorithms|warning|medium| + +#### Security + +| Name | Description | Severity | Precision | +| --- | ----------- | :----: | :--------: | +|[Async unsafe signal handler](../cpp/src/docs/security/AsyncUnsafeSignalHandler/AsyncUnsafeSignalHandler.md)|Async unsafe signal handler (like the one used in CVE-2024-6387)|warning|high| +|[Decrementation overflow when comparing](../cpp/src/docs/security/DecOverflowWhenComparing/DecOverflowWhenComparing.md)|This query finds unsigned integer overflows resulting from unchecked decrementation during comparison.|error|high| +|[Find all problematic implicit casts](../cpp/src/docs/security/UnsafeImplicitConversions/UnsafeImplicitConversions.md)|Find all implicit casts that may be problematic. That is, casts that may result in unexpected truncation, reinterpretation or widening of values.|error|high| +|[Inconsistent handling of return values from a specific function](../cpp/src/docs/security/InconsistentReturnValueHandling/InconsistentReturnValueHandling.md)|Detects functions whose return values are compared inconsistently across call sites, which may indicate bugs.|warning|medium| +|[Invalid string size passed to string manipulation function](../cpp/src/docs/security/CStrnFinder/CStrnFinder.md)|Finds calls to functions that take as input a string and its size as separate arguments (e.g., `strncmp`, `strncat`, ...) and the size argument is wrong|error|low| +|[Iterator invalidation](../cpp/src/docs/security/IteratorInvalidation/IteratorInvalidation.md)|Modifying a container while iterating over it can invalidate iterators, leading to undefined behavior.|warning|medium| +|[Missing null terminator](../cpp/src/docs/security/NoNullTerminator/NoNullTerminator.md)|This query finds incorrectly initialized strings that are passed to functions expecting null-byte-terminated strings|error|high| + +### Go + +#### Cryptography + +| Name | Description | Severity | Precision | +| --- | ----------- | :----: | :--------: | +|[Message not hashed before signature verification](../go/src/docs/crypto/MsgNotHashedBeforeSigVerfication/MsgNotHashedBeforeSigVerfication.md)|Detects calls to (EC)DSA APIs with a message that was not hashed. If the message is longer than the expected hash digest size, it is silently truncated|error|medium| + +#### Security + +| Name | Description | Severity | Precision | +| --- | ----------- | :----: | :--------: | +|[Invalid file permission parameter](../go/src/docs/security/FilePermsFlaws/FilePermsFlaws.md)|Finds non-octal (e.g., `755` vs `0o755`) and unsupported (e.g., `04666`) literals used as a filesystem permission parameter (`FileMode`)|error|medium| +|[Missing MinVersion in tls.Config](../go/src/docs/security/MissingMinVersionTLS/MissingMinVersionTLS.md)|Finds uses of tls.Config where MinVersion is not set and the project's minimum Go version (from go.mod) indicates insecure defaults: Go < 1.18 for clients or Go < 1.22 for servers. Does not mark explicitly set versions (including explicitly insecure ones).|error|medium| +|[Trim functions misuse](../go/src/docs/security/TrimMisuse/TrimMisuse.md)|Finds calls to `string.{Trim,TrimLeft,TrimRight}` with the 2nd argument not being a cutset but a continuous substring to be trimmed|error|low| + +### Java and Kotlin + +#### Security + +| Name | Description | Severity | Precision | +| --- | ----------- | :----: | :--------: | +|[Recursive functions](../java/src/docs/security/Recursion/Recursion.md)|Detects possibly unbounded recursive calls|warning|low| + diff --git a/go/src/codeql-pack.lock.yml b/go/src/codeql-pack.lock.yml index bfe9e40..d28f5e9 100644 --- a/go/src/codeql-pack.lock.yml +++ b/go/src/codeql-pack.lock.yml @@ -2,23 +2,23 @@ lockVersion: 1.0.0 dependencies: codeql/concepts: - version: 0.0.12 + version: 0.0.21 codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/go-all: - version: 5.0.5 + version: 7.0.5 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/threat-models: - version: 1.0.38 + version: 1.0.47 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 compiled: false diff --git a/go/src/docs/security/FilePermsFlaws/FilePermsFlaws.md b/go/src/docs/security/FilePermsFlaws/FilePermsFlaws.md index c02ba90..24f3a75 100644 --- a/go/src/docs/security/FilePermsFlaws/FilePermsFlaws.md +++ b/go/src/docs/security/FilePermsFlaws/FilePermsFlaws.md @@ -1,4 +1,4 @@ -# File permission flaws +# Invalid file permission parameter `FileMode` parameters (for methods accessing filesystem) are usually provided as octal numbers. This query detects hardcoded `FileMode`s that are not in octal form - numbers you see in the code are not what you will get. The query filters out some commonly used, not-octal integers to reduce number of false positives. Moreover, the query detect calls to permission-changing methods (e.g., `os.Chmod`, `os.Mkdir`) when the `FileMode` has more than 9 bits set - other bits may not be used, depending on the operating system. diff --git a/go/src/docs/security/MissingMinVersionTLS/MissingMinVersionTLS.md b/go/src/docs/security/MissingMinVersionTLS/MissingMinVersionTLS.md index e9c936f..6a4d4fc 100644 --- a/go/src/docs/security/MissingMinVersionTLS/MissingMinVersionTLS.md +++ b/go/src/docs/security/MissingMinVersionTLS/MissingMinVersionTLS.md @@ -1,21 +1,19 @@ # Missing MinVersion in tls.Config -Golang's `tls.Config` struct accepts `MinVersion` parameter that sets minimum accepted TLS version. If the parameter is not provided, the default depends on the Go version in use: +Golang's `tls.Config` struct accepts a `MinVersion` parameter that sets the minimum accepted TLS version. If the parameter is not provided, the default depends on the Go version in use. Since Go 1.18, `crypto/tls` clients default to TLS 1.2 (previously TLS 1.0). Since Go 1.22, `crypto/tls` servers also default to TLS 1.2 (previously TLS 1.0). -- Since **Go 1.18**, clients default to TLS 1.2 (previously TLS 1.0) -- Since **Go 1.22**, servers also default to TLS 1.2 (previously TLS 1.0) +This query flags `tls.Config` values where `MinVersion` is never set explicitly and the project's `go.mod` declares support for a Go version where the defaults are insecure: -For projects that support older Go versions, leaving `MinVersion` unset may still permit TLS 1.0 or 1.1, which are deprecated and should not be used. +* Go < 1.18 for client-side configs (when client default is TLS 1.0) +* Go < 1.22 for server-side configs (when server default is TLS 1.0) +TLS 1.0 and 1.1 are deprecated and should not be used. -This query flags `tls.Config` values where `MinVersion` is never set explicitly and the project's `go.mod` declares support for: -- **Go < 1.18** for client-side configs (when client default is TLS 1.0) -- **Go < 1.22** for server-side configs (when server default is TLS 1.0) ## Recommendation Explicitly set the TLS version to TLS 1.2 or higher: -- For projects using Go < 1.18: Set `MinVersion` for both clients and servers -- For projects using Go 1.18-1.21: Set `MinVersion` for servers -- For projects using Go >= 1.22: Defaults are secure, but explicit setting is still recommended +* For projects using Go < 1.18: Set `MinVersion` for both clients and servers +* For projects using Go 1.18-1.21: Set `MinVersion` for servers +* For projects using Go >= 1.22: Defaults are secure, but explicit setting is still recommended ## Example @@ -39,7 +37,7 @@ func test1() *tls.Config { func test2() *tls.Config { config := &tls.Config{} - config.MinVersion = 0 // GOOD: min version is set (hovewer, to the default one) + config.MinVersion = 0 // GOOD: min version is set (however, to the default one) result := config return result } @@ -61,13 +59,13 @@ func main() { } ``` -In this example, the `http.Server` may be set with TLS configuration created by either `test1` or `test2` functions. For projects with `go` directive < 1.22, the `test1` result will be highlighted by this query, as it fails to explicitly set minimum supported TLS version. The `test2` result will not be marked, even though it also uses the default value for minimum version. That is because the `test2` is explicit, and this query assumes that developers knew what they are doing. +In this example, the `http.Server` may be set with TLS configuration created by either `test1` or `test2` functions. For projects with a `go` directive < 1.22, the `test1` result will be highlighted by this query, as it fails to explicitly set minimum supported TLS version. The `test2` result will not be marked, even though it also uses the default value for minimum version. That is because the `test2` is explicit, and this query assumes that developers knew what they are doing. Note: The query behavior depends on the `go` directive in `go.mod`: -- **Go < 1.18**: Both client and server configs without MinVersion are flagged -- **Go 1.18-1.21**: Only server configs without MinVersion are flagged -- **Go >= 1.22**: No configs are flagged (both defaults are secure) +* Go < 1.18: Both client and server configs without MinVersion are flagged +* Go 1.18-1.21: Only server configs without MinVersion are flagged +* Go >= 1.22: No configs are flagged (both defaults are secure) ## References * [tls.Config specification](https://pkg.go.dev/crypto/tls#Config) diff --git a/go/src/security/MissingMinVersionTLS/MissingMinVersionTLS.go b/go/src/security/MissingMinVersionTLS/MissingMinVersionTLS.go index 56fea1f..5c3f5f0 100644 --- a/go/src/security/MissingMinVersionTLS/MissingMinVersionTLS.go +++ b/go/src/security/MissingMinVersionTLS/MissingMinVersionTLS.go @@ -17,7 +17,7 @@ func test1() *tls.Config { func test2() *tls.Config { config := &tls.Config{} - config.MinVersion = 0 // GOOD: min version is set (hovewer, to the default one) + config.MinVersion = 0 // GOOD: min version is set (however, to the default one) result := config return result } diff --git a/go/test/codeql-pack.lock.yml b/go/test/codeql-pack.lock.yml index bfe9e40..d28f5e9 100644 --- a/go/test/codeql-pack.lock.yml +++ b/go/test/codeql-pack.lock.yml @@ -2,23 +2,23 @@ lockVersion: 1.0.0 dependencies: codeql/concepts: - version: 0.0.12 + version: 0.0.21 codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/go-all: - version: 5.0.5 + version: 7.0.5 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/threat-models: - version: 1.0.38 + version: 1.0.47 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 compiled: false diff --git a/java/src/codeql-pack.lock.yml b/java/src/codeql-pack.lock.yml index a8830bf..a3a5c01 100644 --- a/java/src/codeql-pack.lock.yml +++ b/java/src/codeql-pack.lock.yml @@ -2,31 +2,31 @@ lockVersion: 1.0.0 dependencies: codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/java-all: - version: 7.8.2 + version: 9.0.3 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/quantum: - version: 0.0.16 + version: 0.0.25 codeql/rangeanalysis: - version: 1.0.38 + version: 1.0.47 codeql/regex: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/threat-models: - version: 1.0.38 + version: 1.0.47 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typeflow: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 codeql/xml: - version: 1.0.38 + version: 1.0.47 compiled: false diff --git a/java/test/codeql-pack.lock.yml b/java/test/codeql-pack.lock.yml index a8830bf..a3a5c01 100644 --- a/java/test/codeql-pack.lock.yml +++ b/java/test/codeql-pack.lock.yml @@ -2,31 +2,31 @@ lockVersion: 1.0.0 dependencies: codeql/controlflow: - version: 2.0.22 + version: 2.0.31 codeql/dataflow: - version: 2.0.22 + version: 2.1.3 codeql/java-all: - version: 7.8.2 + version: 9.0.3 codeql/mad: - version: 1.0.38 + version: 1.0.47 codeql/quantum: - version: 0.0.16 + version: 0.0.25 codeql/rangeanalysis: - version: 1.0.38 + version: 1.0.47 codeql/regex: - version: 1.0.38 + version: 1.0.47 codeql/ssa: - version: 2.0.14 + version: 2.0.23 codeql/threat-models: - version: 1.0.38 + version: 1.0.47 codeql/tutorial: - version: 1.0.38 + version: 1.0.47 codeql/typeflow: - version: 1.0.38 + version: 1.0.47 codeql/typetracking: - version: 2.0.22 + version: 2.0.31 codeql/util: - version: 2.0.25 + version: 2.0.34 codeql/xml: - version: 1.0.38 + version: 1.0.47 compiled: false diff --git a/scripts/queries_table_generator.py b/scripts/queries_table_generator.py index 5985a0d..b652cba 100644 --- a/scripts/queries_table_generator.py +++ b/scripts/queries_table_generator.py @@ -1,28 +1,42 @@ #!/usr/bin/env python -import sys +"""Generate markdown tables for all queries. + +Queries are listed from all Qlpacks' "full" suites, grouped by the custom +"group" metadata, and ordered by name (experimental queries at the end). +""" + import json -import yaml import subprocess -from typing import List, Optional -from pathlib import Path +import sys +from dataclasses import dataclass from functools import total_ordering +from pathlib import Path +from typing import Optional + +import yaml -''' -Generate markdown tables for all queries +@dataclass(frozen=True) +class QueryMetadata: + """Metadata extracted from a single .ql query file.""" -Queries are listed from all Qlpacks' "full" suites. + name: str + id: str + description: str + kind: str + tags: str + group: str + problem_severity: Optional[str] + precision: Optional[str] + security_severity: Optional[str] -Queries are grouped by the custom "group" metadata and -ordered by name (experimental queries at the end) -''' @total_ordering class QlQuery: - def __init__(self, path, lang, name, _id, description, kind, tags, group, problem_severity, precision, security_severity): - self.lang = lang - + def __init__(self, path, doc_lang_dir: str, metadata: QueryMetadata): + self.doc_lang_dir = doc_lang_dir + qlpack_base = Path(path).parent while "qlpack.yml" not in [x.name for x in qlpack_base.glob("*")]: qlpack_base = qlpack_base.parent @@ -31,16 +45,20 @@ def __init__(self, path, lang, name, _id, description, kind, tags, group, proble self.path = Path(path) self.rel_path = Path(path).relative_to(qlpack_base) - self.name: str = name - self.id: str = _id - self.description: str = description - self.kind: str = kind - self.tags: List[str] = sorted(tags.split(' ')) - self.group: str = group - - self.problem_severity: Optional[str] = problem_severity - self.precision: Optional[str] = precision - self.security_severity: Optional[float] = float(security_severity) if security_severity is not None else None + self.name: str = metadata.name + self.id: str = metadata.id + self.description: str = metadata.description + self.kind: str = metadata.kind + self.tags: list[str] = sorted(metadata.tags.split(' ')) + self.group: str = metadata.group + + self.problem_severity: Optional[str] = metadata.problem_severity + self.precision: Optional[str] = metadata.precision + self.security_severity: Optional[float] = ( + float(metadata.security_severity) + if metadata.security_severity is not None + else None + ) def __hash__(self): return hash(self.id) @@ -52,19 +70,22 @@ def __gt__(self, other: "QlQuery"): if self.group != other.group: return self.group < other.group - if 'experimental' in self.tags: - if 'experimental' not in other.tags: - return False - if 'experimental' in other.tags: - return True + # Experimental queries sort after non-experimental ones. + self_exp = 'experimental' in self.tags + other_exp = 'experimental' in other.tags + if self_exp and not other_exp: + return False + if other_exp and not self_exp: + return True return self.name < other.name def md_table_line(self): - qhelp_markdown_path = (Path(self.lang)/'src'/'docs'/self.rel_path).with_suffix('.md') + qhelp_markdown_path = ( + Path('..') / self.doc_lang_dir / 'src' / 'docs' / self.rel_path + ).with_suffix('.md') cells = [ - # f"{self.id}", - f'[{self.name}](./{qhelp_markdown_path})', + f'[{self.name}]({qhelp_markdown_path})', f"{self.description}", f"{self.problem_severity}", f"{self.precision}", @@ -74,37 +95,37 @@ def md_table_line(self): return "|" + "|".join(cells) + "|" @staticmethod - def parse_from_file(path: Path, lang: str) -> "QlQuery": - metadata = subprocess.check_output([ - 'codeql', - 'resolve', - 'metadata', - str(path), - ], stderr=subprocess.DEVNULL).decode("utf-8") - - metadata = json.loads(metadata) - - return QlQuery( - path, - lang, - metadata["name"], - metadata["id"], - metadata["description"], - metadata["kind"], - metadata["tags"], - metadata.get("group", "security"), - metadata.get("problem.severity"), - metadata.get("precision"), - metadata.get("security-severity"), + def parse_from_file(path: Path, doc_lang_dir: str) -> "QlQuery": + raw_metadata = subprocess.check_output( + ['codeql', 'resolve', 'metadata', str(path)] + ).decode("utf-8") + + parsed = json.loads(raw_metadata) + if "group" not in parsed: + sys.stderr.write( + f'WARN: {path} has no "group" metadata, defaulting to "security".\n' + ) + metadata = QueryMetadata( + name=parsed["name"], + id=parsed["id"], + description=parsed["description"], + kind=parsed["kind"], + tags=parsed["tags"], + group=parsed.get("group", "security"), + problem_severity=parsed.get("problem.severity"), + precision=parsed.get("precision"), + security_severity=parsed.get("security-severity"), ) + return QlQuery(path, doc_lang_dir, metadata) + class Qls: - def __init__(self, path: Path, queries: List[QlQuery], lang: str): + def __init__(self, path: Path, queries: list[QlQuery], doc_lang_dir: str): self.path: Path = path - self.lang = lang + self.doc_lang_dir = doc_lang_dir - self.queries: dict[str, List[QlQuery]] = {} + self.queries: dict[str, list[QlQuery]] = {} for query in queries: self.queries.setdefault(query.group, []).append(query) for group, query_list in self.queries.items(): @@ -122,29 +143,39 @@ def md_tables(self) -> str: for q in query_list: table += q.md_table_line() + "\n" - tables += table + "\n\n" + tables += table + "\n" return tables @staticmethod - def get_qls_from_path(qls_path: Path, lang: str) -> "Qls": + def get_qls_from_path(qls_path: Path, doc_lang_dir: str) -> "Qls": queries_paths = subprocess.check_output([ 'codeql', 'resolve', 'queries', '--search-path=.', str(qls_path), - ], stderr=subprocess.DEVNULL).decode("utf-8").split("\n") + ]).decode("utf-8").split("\n") queries = [] for path in queries_paths: if not path: continue - new = QlQuery.parse_from_file(path, lang) + new = QlQuery.parse_from_file(Path(path), doc_lang_dir) queries.append(new) - print(f"INFO: Parsed {len(queries)} from {qls_path}.", file=sys.stderr) - return Qls(qls_path, queries, lang) + sys.stderr.write(f"INFO: Parsed {len(queries)} from {qls_path}.\n") + return Qls(qls_path, queries, doc_lang_dir) + + +def display_name(extractor: str) -> str: + """Return the human-readable language heading for an extractor name.""" + if extractor == 'cpp': + return 'C and C++' + if extractor == 'java-kotlin': + return 'Java and Kotlin' + return extractor.capitalize() + def main(): # get ToB qlpacks that are not tests nor libraries @@ -158,15 +189,18 @@ def main(): '--format=json', ]).decode("utf-8") ) - + for k, v in qlpack_paths.items(): if k.startswith('trailofbits/') and not k.endswith('-tests') and not k.endswith('-all'): tob_qlpacks[k] = Path(v[0]) - # for every pack finds the "full" suit - tob_suits = {} + sys.stdout.write( + "\n\n" + ) + + # for every pack finds the "full" suite for qlpack_name, qlpack_path in tob_qlpacks.items(): - with open(qlpack_path / 'qlpack.yml') as f: + with open(qlpack_path / 'qlpack.yml') as f: qlpack = yaml.safe_load(f) # skip libraries @@ -179,21 +213,24 @@ def main(): suites_dir = qlpack.get('suites', 'codeql-suites') suites = list((qlpack_path / suites_dir).glob('*-full.qls')) if len(suites) > 1: - print(f'Error: Found more than 1 "full" suite for qlpack {qlpack_name}', file=sys.stderr) - if len(suites) == 0: - print(f'Error: No "full" suite for qlpack {qlpack_name}', file=sys.stderr) - continue - - # generate and print markdown - lang = qlpack["extractor"] - suit = Qls.get_qls_from_path(suites[0], lang) - lang = lang.capitalize() - if lang == 'Cpp': - lang = 'C and C++' - print(f'### {lang}\n') - print(suit.md_tables()) - - print("Copy-paste tables to the README.md file") + sys.exit( + f'Error: Found more than 1 "full" suite for qlpack {qlpack_name}' + ) + if len(suites) == 0: + sys.exit(f'Error: No "full" suite for qlpack {qlpack_name}') + + extractor = qlpack.get("extractor") + if extractor is None: + sys.exit(f'Error: qlpack {qlpack_name} has no "extractor" field') + + # The on-disk directory holding the docs (e.g., "java") may differ from + # the extractor name (e.g., "java-kotlin"). Derive the doc dir from the + # pack's filesystem location, which is /src. + doc_lang_dir = qlpack_path.parent.name + suite = Qls.get_qls_from_path(suites[0], doc_lang_dir) + + sys.stdout.write(f'### {display_name(extractor)}\n\n') + sys.stdout.write(suite.md_tables()) if __name__ == "__main__":