-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract the initial version of sentry-scrubber from Tribler
- Loading branch information
Showing
16 changed files
with
1,602 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
""" | ||
Script to generate GitHub Actions annotations from coverage data. | ||
This script processes a JSON coverage report and generates GitHub-compatible warning | ||
annotations for lines that are not covered by tests. It reads a JSON file containing | ||
coverage statistics and outputs formatted warnings that will appear in GitHub PRs. | ||
Usage: | ||
python annotate_coverage.py <path_to_json> | ||
Arguments: | ||
path_to_json: Path to the JSON file containing coverage data | ||
The JSON file should contain a 'src_stats' object with file paths as keys and | ||
coverage statistics as values. Each file's statistics should include a 'violations' | ||
list containing uncovered line numbers. | ||
Example output: | ||
::warning file=path/to/file.py,line=42::Line 42 is not covered by tests... | ||
""" | ||
|
||
import json | ||
import sys | ||
|
||
if len(sys.argv) != 2: | ||
print("Usage: python annotate_coverage.py <path_to_json>") | ||
sys.exit(1) | ||
|
||
# Load the JSON file | ||
json_file = sys.argv[1] | ||
with open(json_file, 'r') as file: | ||
coverage_data = json.load(file) | ||
|
||
src_stats = coverage_data.get("src_stats", {}) | ||
annotations = [] | ||
|
||
for file_path, stats in src_stats.items(): | ||
violations = stats.get("violations", []) | ||
|
||
for line, _ in violations: | ||
message = ( | ||
f"Line {line} is not covered by tests. Consider adding test cases to improve coverage." | ||
) | ||
|
||
annotation = ( | ||
f"::warning file={file_path},line={line}::{message}" | ||
) | ||
print(annotation) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Parse Semgrep JSON output and create GitHub Actions annotations. | ||
This script reads Semgrep analysis results from a JSON file and converts them | ||
into GitHub Actions warning annotations. For each issue found by Semgrep, | ||
it creates an annotation containing the file path, line number, message, | ||
suggested fix (if available), and references (if available). | ||
The Semgrep JSON output is expected to have a 'results' array containing objects with: | ||
- path: file path where the issue was found | ||
- start: object containing 'line' number | ||
- extra: object containing 'message' description | ||
- fix: optional fix suggestion | ||
- extra.metadata.references: optional list of reference URLs | ||
Usage: | ||
python parse_semgrep.py [input_file] [--fail-on SEVERITY,...] | ||
Arguments: | ||
input_file JSON file containing Semgrep results (default: results.json) | ||
--fail-on Comma-separated list of severity levels that will cause script | ||
to exit with error (e.g., --fail-on ERROR,WARNING) | ||
The script processes all results before exiting, ensuring all issues are reported. | ||
Exit code 1 indicates that issues with specified severity levels were found. | ||
""" | ||
import json | ||
import sys | ||
from pathlib import Path | ||
|
||
|
||
def parse_args(): | ||
import argparse | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('input_file', nargs='?', default='results.json', | ||
help='JSON file containing Semgrep results') | ||
parser.add_argument('--fail-on', type=str, | ||
help='Comma-separated list of severity levels that will cause failure') | ||
return parser.parse_args() | ||
|
||
|
||
def wrap_text(text, width=120): | ||
""" Wraps the given text at approximately `width` characters without breaking words. """ | ||
current_line = [] | ||
current_len = 0 | ||
for w in text.split(): | ||
current_line.append(w) | ||
current_len += len(w) | ||
if current_len > width: | ||
yield ' '.join(current_line) | ||
current_line = [] | ||
current_len = 0 | ||
yield ' '.join(current_line) | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
fail_on = set(level.upper() for level in args.fail_on.split(',')) if args.fail_on else set() | ||
|
||
with open(Path(args.input_file), "r", encoding="utf-8") as f: | ||
data = json.load(f) | ||
|
||
if "results" not in data or not data["results"]: | ||
sys.exit(0) | ||
|
||
found_severe_issues = False | ||
|
||
for issue in data["results"]: | ||
path = issue.get("path") | ||
start_line = issue.get("start", {}).get("line", 1) | ||
message = issue.get("extra", {}).get("message", "No message") | ||
severity = issue.get("extra", {}).get("severity", "WARNING").upper() | ||
|
||
# Map Semgrep severity to GitHub annotation level | ||
level = { | ||
"ERROR": "error", | ||
"WARNING": "warning", | ||
"INFO": "notice", | ||
}.get(severity, "warning") # default to warning if unknown severity | ||
|
||
# Extract additional metadata | ||
fix = issue.get("fix", "") | ||
metadata = issue.get("extra", {}).get("metadata", {}) | ||
references = metadata.get("references", []) | ||
confidence = metadata.get("confidence", "Unknown") | ||
likelihood = metadata.get("likelihood", "Unknown") | ||
impact = metadata.get("impact", "Unknown") | ||
source = metadata.get("source", "Unknown") | ||
|
||
# Build the annotation message | ||
annotation_msg = "%0A".join(wrap_text(message)) | ||
if fix: | ||
wrapped_fix = "%0A".join(wrap_text(fix)) | ||
annotation_msg += f"%0ASuggested fix: {wrapped_fix}" | ||
|
||
# Add metadata information | ||
annotation_msg += "%0A%0AMetadata:" | ||
annotation_msg += f"%0A- Confidence: {confidence}" | ||
annotation_msg += f"%0A- Likelihood: {likelihood}" | ||
annotation_msg += f"%0A- Impact: {impact}" | ||
annotation_msg += f"%0A- Source: {source}" | ||
|
||
if references: | ||
ref_list = "%0A".join(f'- {r}' for r in references) | ||
annotation_msg += f"%0A%0AReferences:%0A{ref_list}" | ||
|
||
print(f"::{level} file={path},line={start_line}::{annotation_msg}") | ||
|
||
if severity in fail_on: | ||
found_severe_issues = True | ||
|
||
if found_severe_issues: | ||
sys.exit(1) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
name: Publish Python Package | ||
|
||
on: | ||
release: | ||
types: [ published ] | ||
|
||
jobs: | ||
deploy: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- run: pipx install poetry | ||
|
||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.12.7' | ||
cache: 'poetry' | ||
|
||
- name: Build and publish | ||
env: | ||
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} | ||
run: | | ||
poetry config pypi-token.pypi $PYPI_API_TOKEN | ||
poetry publish --build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
name: pytest | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
|
||
jobs: | ||
run_pytest: | ||
name: pytest | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Install Poetry | ||
run: pipx install poetry | ||
|
||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.12.7' | ||
cache: 'poetry' | ||
|
||
- run: poetry install --no-interaction --no-ansi | ||
|
||
- name: Run Tests | ||
run: | | ||
poetry run pytest \ | ||
--cov \ | ||
--cov-report=xml \ | ||
--cov-report=term-missing \ | ||
${{ inputs.pytest_arguments }} | ||
- name: Compare coverage (optional) | ||
if: ${{ inputs.check_coverage_diff == true && github.event_name == 'pull_request' }} | ||
run: | | ||
poetry run diff-cover coverage.xml \ | ||
--compare-branch=origin/main \ | ||
--json-report=diff_coverage.json \ | ||
--fail-under=80 | ||
- name: Annotate uncovered lines (optional) | ||
if: ${{ always() && inputs.check_coverage_diff == true && github.event_name == 'pull_request' }} | ||
run: | | ||
if [ -f "diff_coverage.json" ]; then | ||
python .github/scripts/annotate_coverage.py diff_coverage.json | ||
else | ||
echo "diff_coverage.json not found. Skipping annotation step." | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
name: Ruff | ||
on: [ pull_request ] | ||
jobs: | ||
ruff: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- run: pipx install ruff | ||
|
||
- name: Get changed Python files | ||
id: changed-py-files | ||
uses: tj-actions/changed-files@v42 | ||
with: | ||
files: | | ||
**/*.py | ||
- name: Run Ruff | ||
if: steps.changed-py-files.outputs.any_changed == 'true' | ||
run: ruff check --output-format=github ${{ steps.changed-py-files.outputs.all_changed_files }} --force-exclude |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
name: Semgrep | ||
on: [ pull_request ] | ||
|
||
jobs: | ||
semgrep: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Check out repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Install Semgrep | ||
run: pipx install semgrep | ||
|
||
- name: Get changed files | ||
id: changed-files | ||
uses: tj-actions/changed-files@v45 | ||
with: | ||
files_ignore: | | ||
**/tests/** | ||
**/conftest.py | ||
- name: Run Semgrep on changed files | ||
if: steps.changed-files.outputs.any_changed == 'true' | ||
run: | | ||
semgrep scan \ | ||
--config auto \ | ||
--json \ | ||
${{ steps.changed-files.outputs.all_changed_files }} \ | ||
> results.json | ||
- name: Parse Semgrep results and create annotations | ||
if: steps.changed-files.outputs.any_changed == 'true' | ||
run: python .github/scripts/parse_semgrep.py results.json --fail-on ERROR |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -169,3 +169,5 @@ cython_debug/ | |
|
||
# PyPI configuration file | ||
.pypirc | ||
.idea | ||
.aider* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Changelog | ||
|
||
All notable changes to this project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), | ||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||
|
||
## [Unreleased] | ||
|
||
### Added | ||
- Initial release of the Sentry Scrubber library | ||
- Core `SentryScrubber` class for scrubbing sensitive information from Sentry events | ||
- Utility functions for data manipulation and string obfuscation: | ||
- `get_first_item`, `get_last_item` for list operations | ||
- `delete_item`, `get_value`, `extract_dict`, `modify_value` for dict operations | ||
- `distinct_by` for list deduplication | ||
- `obfuscate_string` for text anonymization | ||
- `order_by_utc_time` for timestamp-based sorting | ||
- GitHub Actions workflows for: | ||
- PyTest execution | ||
- Ruff linting | ||
- Semgrep security analysis | ||
- Package publishing | ||
- Test suite with coverage reporting | ||
|
||
### Notes | ||
- This code was extracted from [Tribler](https://github.com/Tribler/tribler/blob/release/7.15) as it was initially developed by me for Tribler |
Oops, something went wrong.