Skip to content

Commit

Permalink
Enhance logging
Browse files Browse the repository at this point in the history
(cherry picked from commit 1998c8e)
  • Loading branch information
m-dzianishchyts committed Dec 25, 2024
1 parent d1a884e commit d4fa35a
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 53 deletions.
1 change: 1 addition & 0 deletions .github/workflows/merge_upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
CHANGELOG_AUTHOR: 'ParadiseSS13'
TRANSLATE_CHANGES: 'true'
OPENAI_API_KEY: ${{ secrets.ORG_EMPTY_TOKEN }}
LOG_LEVEL: ${{ runner.debug && 'DEBUG' || 'INFO' }}
run: |
git config --global user.email "action@github.com"
git config --global user.name "Upstream Sync"
Expand Down
150 changes: 97 additions & 53 deletions tools/merge-upstream/merge_upstream.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import enum
import logging
import os
import re
import subprocess
Expand Down Expand Up @@ -55,6 +56,7 @@ class PullDetails(typing.TypedDict):

def check_env():
"""Check if the required environment variables are set."""
logging.debug("Checking environment variables...")
required_vars = [
"GITHUB_TOKEN",
"TARGET_REPO",
Expand All @@ -67,9 +69,12 @@ def check_env():
required_vars.append("OPENAI_API_KEY")
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
logging.error("Missing required environment variables: %s", ", ".join(missing_vars))
raise EnvironmentError(f"Missing required environment variables: {', '.join(missing_vars)}")


logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO").upper())

# Environment variables
TRANSLATE_CHANGES = os.getenv("TRANSLATE_CHANGES", "False").lower() in ("true", "yes", "1")
CHANGELOG_AUTHOR = os.getenv("CHANGELOG_AUTHOR", "")
Expand All @@ -86,73 +91,82 @@ def check_env():

def run_command(command: str) -> str:
"""Run a shell command and return its output."""
logging.debug("Running command: %s", command)
try:
result: CompletedProcess[str] = subprocess.run(command, shell=True, capture_output=True, text=True)
result.check_returncode()
logging.debug("Command output: %s", result.stdout.strip())
return result.stdout.strip()
except subprocess.CalledProcessError as e:
print(f"Error executing command: {command}\nExit code: {e.returncode}\nOutput: {e.output}\nError: {e.stderr}")
logging.error("Error executing command: %s", command)
logging.error("Exit code: %d, Output: %s, Error: %s", e.returncode, e.output, e.stderr)
raise


def setup_repo():
"""Clone the repository and set up the upstream remote."""
print(f"Cloning repository: {TARGET_REPO}")
logging.info("Cloning repository: %s", TARGET_REPO)
run_command(f"git clone https://x-access-token:{GITHUB_TOKEN}@github.com/{TARGET_REPO}.git repo")
os.chdir("repo")
run_command(f"git remote add upstream https://x-access-token:{GITHUB_TOKEN}@github.com/{UPSTREAM_REPO}.git")
print(run_command(f"git remote -v"))
logging.info("Git remotes set up: %s", run_command(f"git remote -v"))


def update_merge_branch():
"""Update the merge branch with the latest changes from upstream."""
print(f"Fetching branch {UPSTREAM_BRANCH} from upstream...")
logging.info("Fetching branch %s from upstream...", UPSTREAM_BRANCH)
run_command(f"git fetch upstream {UPSTREAM_BRANCH}")
run_command(f"git fetch origin")
all_branches: list[str] = run_command("git branch -a").split()
logging.debug("Fetched branches: %s", all_branches)

if f"remotes/origin/{MERGE_BRANCH}" not in all_branches:
print(f"Branch '{MERGE_BRANCH}' does not exist. Creating it from upstream/{UPSTREAM_BRANCH}...")
logging.info("Branch '%s' does not exist. Creating it from upstream/%s...", MERGE_BRANCH, UPSTREAM_BRANCH)
run_command(f"git checkout -b {MERGE_BRANCH} upstream/{UPSTREAM_BRANCH}")
run_command(f"git push -u origin {MERGE_BRANCH}")
return

print(f"Resetting {MERGE_BRANCH} onto upstream/{UPSTREAM_BRANCH}...")
logging.info("Resetting '%s' onto upstream/%s...", MERGE_BRANCH, UPSTREAM_BRANCH)
run_command(f"git checkout {MERGE_BRANCH}")
run_command(f"git reset --hard upstream/{UPSTREAM_BRANCH}")

print("Pushing changes to origin...")
logging.info("Pushing changes to origin...")
run_command(f"git push origin {MERGE_BRANCH} --force")


def detect_commits() -> list[str]:
"""Detect commits from upstream not yet in downstream."""
print("Detecting new commits from upstream...")
logging.info("Detecting new commits from upstream...")
commit_log: list[str] = run_command(f"git log {TARGET_BRANCH}..{MERGE_BRANCH} --pretty=format:'%h %s'").split("\n")
commit_log.reverse()
logging.debug("Detected commits: %s", commit_log)
return commit_log


def fetch_pull(github: Github, pull_number: int) -> PullRequest | None:
"""Fetch the pull request from GitHub."""
logging.debug("Fetching pull request #%d", pull_number)
upstream_repo: Repository = github.get_repo(UPSTREAM_REPO)

max_retries = 3
for attempt in range(max_retries):
try:
return upstream_repo.get_pull(int(pull_number))
pull = upstream_repo.get_pull(int(pull_number))
logging.debug("Successfully fetched PR #%d: %s", pull_number, pull.title)
return pull
except Exception as e:
print(f"Error fetching PR #{pull_number}: {e}")
logging.error("Error fetching PR #%d: %s", pull_number, e)
if attempt + 1 < max_retries:
logging.warning("Retrying fetch for PR #%d (attempt %d/%d)", pull_number, attempt + 1, max_retries)
time.sleep(2)
else:
logging.error("Failed to fetch PR #%d after %d attempts", pull_number, max_retries)
return None


def build_details(github: Github, commit_log: list[str],
translate: typing.Optional[typing.Callable[[typing.Dict[int, list[Change]]], None]]) -> PullDetails:
"""Generate data from parsed commits."""
print("Building details...")
logging.info("Building pull request details from commit log...")
pull_number_pattern: Pattern[str] = re.compile("#(?P<id>\\d+)")
details = PullDetails(
changelog={},
Expand All @@ -168,55 +182,63 @@ def build_details(github: Github, commit_log: list[str],
for commit in commit_log:
match = re.search(pull_number_pattern, commit)
if not match:
print(f"Skipping {commit}")
logging.debug("Skipping commit without pull request reference: %s", commit)
continue

pull_number = int(match.group("id"))

if pull_number in pull_cache:
print(
f"WARNING: pull duplicate found.\n"
f"1: {pull_cache[pull_number]}\n"
f"2: {commit}"
logging.warning(
"Duplicate pull request detected for #%d\n"
"Existing: %s\n"
"New: %s",
pull_number, pull_cache[pull_number], commit
)
print(f"Skipping {commit}")
continue

pull_cache[pull_number] = commit
futures[executor.submit(fetch_pull, github, pull_number)] = pull_number

for future in as_completed(futures):
pull_number = futures[future]
pull: PullRequest | None = future.result()

if not pull:
print(f"Pull {pull_number} was not fetched. Skipping.")
continue

process_pull(details, pull)
try:
pull: PullRequest | None = future.result()
if not pull:
logging.warning("Failed to fetch pull request #%d. Skipping.", pull_number)
continue
process_pull(details, pull)
except Exception as e:
logging.error("Error processing pull request #%d: %s", pull_number, e)

if translate:
translate(details["changelog"])

logging.info("Details building complete. Processed %d pull requests.", len(details["merge_order"]))
return details


def process_pull(details: PullDetails, pull: PullRequest):
"""Handle fetched pull request data during details building."""
logging.debug("Processing pull request #%d: %s", pull.number, pull.title)
pull_number: int = pull.number
labels: list[str] = [label.name for label in pull.get_labels()]
pull_changes: list[Change] = []

try:
for label in labels:
if label == UpstreamLabel.CONFIG_CHANGE.value:
details["config_changes"].append(pull)
logging.debug("Detected CONFIG_CHANGE label for PR #%d", pull_number)
elif label == UpstreamLabel.SQL_CHANGE.value:
details["sql_changes"].append(pull)
logging.debug("Detected SQL_CHANGE label for PR #%d", pull_number)
elif label == UpstreamLabel.WIKI_CHANGE.value:
details["wiki_changes"].append(pull)
logging.debug("Detected WIKI_CHANGE label for PR #%d", pull_number)

parsed = changelog_utils.parse_changelog(pull.body)
if parsed and parsed["changes"]:
logging.debug("Parsed changelog for PR #%d: %s", pull_number, parsed["changes"])
for change in parsed["changes"]:
pull_changes.append(Change(
tag=change["tag"],
Expand All @@ -226,28 +248,34 @@ def process_pull(details: PullDetails, pull: PullRequest):

if pull_changes:
details["changelog"][pull_number] = pull_changes
logging.debug("Added %d changes for PR #%d", len(pull_changes), pull_number)
except Exception as e:
print(
f"An error occurred while processing {pull.html_url}\n"
f"Body: {pull.body}"
logging.error(
"An error occurred while processing PR #%d: %s\n"
"Body: %s",
pull.number, e, pull.body
)
raise e
raise


def translate_changelog(changelog: typing.Dict[int, list[Change]]):
"""Translate changelog using OpenAI API."""
print("Translating changelog...")
logging.info("Translating changelog...")
if not changelog:
logging.warning("No changelog entries to translate.")
return

changes: list[Change] = [change for changes in changelog.values() for change in changes]
if not changes:
logging.warning("No changes found in the changelog to translate.")
return

logging.debug("Preparing text for translation: %d changes", len(changes))
text = "\n".join([change["message"] for change in changes])
logging.debug(text)
script_dir = Path(__file__).resolve().parent
with open(script_dir.joinpath("translation_context.txt"), encoding="utf-8") as f:
context = "\n".join(f.readlines()).strip()
text = "\n".join([change["message"] for change in changes])

client = OpenAI(
base_url="https://models.inference.ai.azure.com",
Expand All @@ -265,12 +293,13 @@ def translate_changelog(changelog: typing.Dict[int, list[Change]]):
translated_text: str | None = response.choices[0].message.content

if not translated_text:
print("WARNING: changelog translation failed!")
print(response)
logging.warning("Changelog translation failed!")
logging.debug("Translation API response: %s", response)
return

for change, translated_message in zip(changes, translated_text.split("\n"), strict=True):
change["translated_message"] = translated_message
logging.debug("Translated: %s -> %s", change["message"], translated_message)


def silence_pull_url(pull_url: str) -> str:
Expand All @@ -280,35 +309,44 @@ def silence_pull_url(pull_url: str) -> str:

def prepare_pull_body(details: PullDetails) -> str:
"""Build new pull request body from the generated changelog."""
logging.info("Preparing pull request body...")
pull_body: str = (
f"This pull request merges upstream/{UPSTREAM_BRANCH}. "
f"Resolve possible conflicts manually and make sure all the changes are applied correctly.\n"
)

if not details:
logging.warning("No pull details provided. Using default body.")
return pull_body

label_to_pulls: dict[UpstreamLabel, list[PullRequest]] = {
UpstreamLabel.CONFIG_CHANGE: details["config_changes"],
UpstreamLabel.SQL_CHANGE: details["sql_changes"],
UpstreamLabel.WIKI_CHANGE: details["wiki_changes"]
}

for label, fetched_pulls in label_to_pulls.items():
if not fetched_pulls:
logging.debug("No pulls found for label '%s'", label.value)
continue

pull_body += (
f"\n> [!{LABEL_BLOCK_STYLE[label]}]\n"
f"> {label.value}:\n"
)
for fetched_pull in fetched_pulls:
pull_body += f"> {silence_pull_url(fetched_pull.html_url)}\n"
silenced_url = silence_pull_url(fetched_pull.html_url)
logging.debug("Adding pull #%d to body: %s", fetched_pull.number, silenced_url)
pull_body += f"> {silenced_url}\n"

if not details["changelog"]:
logging.info("No changelog entries found.")
return pull_body

logging.info("Adding changelog entries to pull request body.")
pull_body += f"\n## Changelog\n"
pull_body += f":cl: {CHANGELOG_AUTHOR}\n" if CHANGELOG_AUTHOR else ":cl:\n"

for pull_number in details["merge_order"]:
if pull_number not in details["changelog"]:
continue
Expand All @@ -319,44 +357,50 @@ def prepare_pull_body(details: PullDetails) -> str:
pull_url: str = silence_pull_url(change["pull"].html_url)
if translated_message:
pull_body += f"{tag}: {translated_message} <!-- {message} ({pull_url}) -->\n"
logging.debug("Added translated change for PR #%d: %s", pull_number, translated_message)
else:
pull_body += f"{tag}: {message} <!-- ({pull_url}) -->\n"
logging.debug("Added original change for PR #%d: %s", pull_number, message)
pull_body += "/:cl:\n"

logging.info("Pull request body prepared successfully.")
return pull_body


def create_pr(repo: Repository, details: PullDetails):
"""Create a pull request with the processed changelog."""
logging.info("Creating pull request...")
pull_body: str = prepare_pull_body(details)
print("Creating pull request...")

# Create the pull request
pull: PullRequest = repo.create_pull(
title=f"Merge Upstream {datetime.today().strftime('%d.%m.%Y')}",
body=pull_body,
head=MERGE_BRANCH,
base=TARGET_BRANCH
)

if details["wiki_changes"]:
pull.add_to_labels(DownstreamLabel.WIKI_CHANGE)
try:
# Create the pull request
pull: PullRequest = repo.create_pull(
title=f"Merge Upstream {datetime.today().strftime('%d.%m.%Y')}",
body=pull_body,
head=MERGE_BRANCH,
base=TARGET_BRANCH
)
logging.info("Pull request created: %s", pull.html_url)

print("Pull request created successfully.")
if details["wiki_changes"]:
pull.add_to_labels(DownstreamLabel.WIKI_CHANGE)
logging.debug("Added WIKI_CHANGE label to pull request.")
except Exception as e:
logging.error("Failed to create pull request: %s", e)
raise


def check_pull_exists(target_repo: Repository, base: str, head: str):
"""Check if the merge pull request already exist. In this case, fail the action."""
print("Checking on existing pull request...")
"""Check if the merge pull request already exists."""
logging.info("Checking if pull request already exists between '%s' and '%s'...", base, head)
owner: str = target_repo.owner.login
base_strict = f"{owner}:{base}"
head_strict = f"{owner}:{head}"
existing_pulls: PaginatedList[PullRequest] = target_repo.get_pulls(state="open", base=base_strict, head=head_strict)
for pull in existing_pulls:
print(f"Pull request already exists. {pull.html_url}")

if existing_pulls.totalCount:
if existing_pulls.totalCount > 0:
logging.error("Pull request already exists: %s", ", ".join(pull.html_url for pull in existing_pulls))
exit(1)
logging.debug("No existing pull requests found.")

if __name__ == "__main__":
github = Github(GITHUB_TOKEN)
Expand All @@ -372,4 +416,4 @@ def check_pull_exists(target_repo: Repository, base: str, head: str):
details: PullDetails = build_details(github, commit_log, translate_changelog if TRANSLATE_CHANGES else None)
create_pr(target_repo, details)
else:
print(f"No changes detected from {UPSTREAM_REPO}/{UPSTREAM_BRANCH}. Skipping pull request creation.")
logging.info("No changes detected from %s/%s. Skipping pull request creation.", UPSTREAM_REPO, UPSTREAM_BRANCH)

0 comments on commit d4fa35a

Please sign in to comment.