Skip to content

Commit

Permalink
Extract the initial version of sentry-scrubber from Tribler
Browse files Browse the repository at this point in the history
  • Loading branch information
drew2a committed Feb 19, 2025
1 parent f103e0c commit 289b1be
Show file tree
Hide file tree
Showing 16 changed files with 1,602 additions and 0 deletions.
48 changes: 48 additions & 0 deletions .github/scripts/annotate_coverage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
Script to generate GitHub Actions annotations from coverage data.
This script processes a JSON coverage report and generates GitHub-compatible warning
annotations for lines that are not covered by tests. It reads a JSON file containing
coverage statistics and outputs formatted warnings that will appear in GitHub PRs.
Usage:
python annotate_coverage.py <path_to_json>
Arguments:
path_to_json: Path to the JSON file containing coverage data
The JSON file should contain a 'src_stats' object with file paths as keys and
coverage statistics as values. Each file's statistics should include a 'violations'
list containing uncovered line numbers.
Example output:
::warning file=path/to/file.py,line=42::Line 42 is not covered by tests...
"""

import json
import sys

if len(sys.argv) != 2:
print("Usage: python annotate_coverage.py <path_to_json>")
sys.exit(1)

# Load the JSON file
json_file = sys.argv[1]
with open(json_file, 'r') as file:
coverage_data = json.load(file)

src_stats = coverage_data.get("src_stats", {})
annotations = []

for file_path, stats in src_stats.items():
violations = stats.get("violations", [])

for line, _ in violations:
message = (
f"Line {line} is not covered by tests. Consider adding test cases to improve coverage."
)

annotation = (
f"::warning file={file_path},line={line}::{message}"
)
print(annotation)
118 changes: 118 additions & 0 deletions .github/scripts/parse_semgrep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""
Parse Semgrep JSON output and create GitHub Actions annotations.
This script reads Semgrep analysis results from a JSON file and converts them
into GitHub Actions warning annotations. For each issue found by Semgrep,
it creates an annotation containing the file path, line number, message,
suggested fix (if available), and references (if available).
The Semgrep JSON output is expected to have a 'results' array containing objects with:
- path: file path where the issue was found
- start: object containing 'line' number
- extra: object containing 'message' description
- fix: optional fix suggestion
- extra.metadata.references: optional list of reference URLs
Usage:
python parse_semgrep.py [input_file] [--fail-on SEVERITY,...]
Arguments:
input_file JSON file containing Semgrep results (default: results.json)
--fail-on Comma-separated list of severity levels that will cause script
to exit with error (e.g., --fail-on ERROR,WARNING)
The script processes all results before exiting, ensuring all issues are reported.
Exit code 1 indicates that issues with specified severity levels were found.
"""
import json
import sys
from pathlib import Path


def parse_args():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input_file', nargs='?', default='results.json',
help='JSON file containing Semgrep results')
parser.add_argument('--fail-on', type=str,
help='Comma-separated list of severity levels that will cause failure')
return parser.parse_args()


def wrap_text(text, width=120):
""" Wraps the given text at approximately `width` characters without breaking words. """
current_line = []
current_len = 0
for w in text.split():
current_line.append(w)
current_len += len(w)
if current_len > width:
yield ' '.join(current_line)
current_line = []
current_len = 0
yield ' '.join(current_line)


def main():
args = parse_args()
fail_on = set(level.upper() for level in args.fail_on.split(',')) if args.fail_on else set()

with open(Path(args.input_file), "r", encoding="utf-8") as f:
data = json.load(f)

if "results" not in data or not data["results"]:
sys.exit(0)

found_severe_issues = False

for issue in data["results"]:
path = issue.get("path")
start_line = issue.get("start", {}).get("line", 1)
message = issue.get("extra", {}).get("message", "No message")
severity = issue.get("extra", {}).get("severity", "WARNING").upper()

# Map Semgrep severity to GitHub annotation level
level = {
"ERROR": "error",
"WARNING": "warning",
"INFO": "notice",
}.get(severity, "warning") # default to warning if unknown severity

# Extract additional metadata
fix = issue.get("fix", "")
metadata = issue.get("extra", {}).get("metadata", {})
references = metadata.get("references", [])
confidence = metadata.get("confidence", "Unknown")
likelihood = metadata.get("likelihood", "Unknown")
impact = metadata.get("impact", "Unknown")
source = metadata.get("source", "Unknown")

# Build the annotation message
annotation_msg = "%0A".join(wrap_text(message))
if fix:
wrapped_fix = "%0A".join(wrap_text(fix))
annotation_msg += f"%0ASuggested fix: {wrapped_fix}"

# Add metadata information
annotation_msg += "%0A%0AMetadata:"
annotation_msg += f"%0A- Confidence: {confidence}"
annotation_msg += f"%0A- Likelihood: {likelihood}"
annotation_msg += f"%0A- Impact: {impact}"
annotation_msg += f"%0A- Source: {source}"

if references:
ref_list = "%0A".join(f'- {r}' for r in references)
annotation_msg += f"%0A%0AReferences:%0A{ref_list}"

print(f"::{level} file={path},line={start_line}::{annotation_msg}")

if severity in fail_on:
found_severe_issues = True

if found_severe_issues:
sys.exit(1)


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Publish Python Package

on:
release:
types: [ published ]

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- run: pipx install poetry

- uses: actions/setup-python@v4
with:
python-version: '3.12.7'
cache: 'poetry'

- name: Build and publish
env:
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
run: |
poetry config pypi-token.pypi $PYPI_API_TOKEN
poetry publish --build
52 changes: 52 additions & 0 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: pytest

on:
push:
branches:
- main
pull_request:

jobs:
run_pytest:
name: pytest
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install Poetry
run: pipx install poetry

- uses: actions/setup-python@v4
with:
python-version: '3.12.7'
cache: 'poetry'

- run: poetry install --no-interaction --no-ansi

- name: Run Tests
run: |
poetry run pytest \
--cov \
--cov-report=xml \
--cov-report=term-missing \
${{ inputs.pytest_arguments }}
- name: Compare coverage (optional)
if: ${{ inputs.check_coverage_diff == true && github.event_name == 'pull_request' }}
run: |
poetry run diff-cover coverage.xml \
--compare-branch=origin/main \
--json-report=diff_coverage.json \
--fail-under=80
- name: Annotate uncovered lines (optional)
if: ${{ always() && inputs.check_coverage_diff == true && github.event_name == 'pull_request' }}
run: |
if [ -f "diff_coverage.json" ]; then
python .github/scripts/annotate_coverage.py diff_coverage.json
else
echo "diff_coverage.json not found. Skipping annotation step."
fi
20 changes: 20 additions & 0 deletions .github/workflows/ruff.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Ruff
on: [ pull_request ]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- run: pipx install ruff

- name: Get changed Python files
id: changed-py-files
uses: tj-actions/changed-files@v42
with:
files: |
**/*.py
- name: Run Ruff
if: steps.changed-py-files.outputs.any_changed == 'true'
run: ruff check --output-format=github ${{ steps.changed-py-files.outputs.all_changed_files }} --force-exclude
33 changes: 33 additions & 0 deletions .github/workflows/semgrep.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Semgrep
on: [ pull_request ]

jobs:
semgrep:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Install Semgrep
run: pipx install semgrep

- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files_ignore: |
**/tests/**
**/conftest.py
- name: Run Semgrep on changed files
if: steps.changed-files.outputs.any_changed == 'true'
run: |
semgrep scan \
--config auto \
--json \
${{ steps.changed-files.outputs.all_changed_files }} \
> results.json
- name: Parse Semgrep results and create annotations
if: steps.changed-files.outputs.any_changed == 'true'
run: python .github/scripts/parse_semgrep.py results.json --fail-on ERROR
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,5 @@ cython_debug/

# PyPI configuration file
.pypirc
.idea
.aider*
27 changes: 27 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added
- Initial release of the Sentry Scrubber library
- Core `SentryScrubber` class for scrubbing sensitive information from Sentry events
- Utility functions for data manipulation and string obfuscation:
- `get_first_item`, `get_last_item` for list operations
- `delete_item`, `get_value`, `extract_dict`, `modify_value` for dict operations
- `distinct_by` for list deduplication
- `obfuscate_string` for text anonymization
- `order_by_utc_time` for timestamp-based sorting
- GitHub Actions workflows for:
- PyTest execution
- Ruff linting
- Semgrep security analysis
- Package publishing
- Test suite with coverage reporting

### Notes
- This code was extracted from [Tribler](https://github.com/Tribler/tribler/blob/release/7.15) as it was initially developed by me for Tribler
Loading

0 comments on commit 289b1be

Please sign in to comment.