Files
CodeReviewer/.gitea/scripts/code_review.py
Myeongseon Choi c9ab56d2fb
All checks were successful
Code Review / review (pull_request) Successful in 22s
add action
2025-01-10 11:19:30 +09:00

279 lines
8.1 KiB
Python

import fnmatch
import json
import os
import re
from typing import Any, Optional
import requests
from model import Model
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "")
HEADERS = {"Authorization": f"token {ACCESS_TOKEN}"}
GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH")
try:
with open(GITHUB_EVENT_PATH, "r") as f:
EVENT_DATA = json.load(f)
except FileNotFoundError:
print("Failed to load event data.")
exit(1)
FULL_CONTEXT_MODEL_NAME = os.getenv("FULL_CONTEXT_MODEL", "")
SINGLE_CHUNK_MODEL_NAME = os.getenv("SINGLE_CHUNK_MODEL", "")
FULL_CONTEXT_API_KEY = os.getenv("FULL_CONTEXT_API_KEY", "")
SINGLE_CHUNK_API_KEY = os.getenv("SINGLE_CHUNK_API_KEY", "")
EXCLUDE_PATTERNS = os.getenv("EXCLUDE", "").split(",")
def get_diff() -> str | None:
"""Get code difference between base and head from Gitea.
Returns:
str | None: code difference between base and head, or None if failed to get diff
"""
url = EVENT_DATA["pull_request"]["diff_url"]
try:
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Failed to get diff: {e}")
return None
def parse_diff(diff: str) -> list[dict[str, Any]]:
"""Parse diff into list of dicts.
Args:
diff: str, code difference between base and head
Returns:
list[dict[str, Any]]: list of dicts, each dict represents a code chunks
"""
file_pattern = re.compile(
r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S
)
old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$")
hunk_pattern = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)(?=^@@ |$)",
re.MULTILINE | re.DOTALL,
)
list_diff = []
for match in file_pattern.finditer(diff):
diff_text = match.group(3)
old_new_match = list(old_new_pattern.finditer(diff_text))
if len(old_new_match) != 2:
continue
old_file = old_new_match[0].group(2)
old_file = old_file.lstrip("a/") if old_file.startswith("a/") else old_file
new_file = old_new_match[1].group(2)
if new_file == "/dev/null":
print("Neglict deleted file")
continue
new_file = new_file.lstrip("b/")
hunk_match = hunk_pattern.search(diff_text)
if hunk_match is None:
continue
old_idx = int(hunk_match.group(1))
new_idx = int(hunk_match.group(3))
remain_text = diff_text[hunk_match.end() + 1 :]
diff_text = []
for line in remain_text.splitlines():
if line.startswith("-"):
diff_text.append(f"{old_idx} {line}")
old_idx += 1
elif line.startswith("+"):
diff_text.append(f"{new_idx} {line}")
new_idx += 1
else:
diff_text.append(line)
diff_text = "\n".join(diff_text)
if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS):
print(f"Exclude file {new_file}")
continue
list_diff.append(
{
"file": new_file,
"chunk": diff_text,
}
)
return list_diff
def create_comment(
file: str, ai_response: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""Create comments for single chunk review.
Args:
file: str, file name
ai_response: list[dict[str, Any]], AI response for single chunk review
Returns:
list[dict[str, Any]]: comments for single chunk review
"""
comments = []
for ai_response in ai_response:
comments.append(
{
"body": f"[REVIEW] {ai_response['reviewComment']}",
"path": file,
"new_position": int(ai_response["lineNumber"]),
}
)
return comments
def analyze_single_chunks(
single_chunk_model: Model, parsed_diff: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""Analyze single chunks and create comments.
Args:
single_chunk_model: AI Session for single chunk analysis
parsed_diff: list[dict[str, Any]], parsed diff
Returns:
list[dict[str, Any]]: comments for single chunk review
"""
comments = []
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
for diff in parsed_diff:
file = diff["file"]
chunk = diff["chunk"]
response = single_chunk_model.get_response_single_chunk(
file, title, description, chunk
)
response = response.strip("`").lstrip("json").strip() or "[]"
try:
response_json = json.loads(response)
new_comments = create_comment(file, response_json)
comments.extend(new_comments)
except json.JSONDecodeError:
print(f"Failed to parse response: {response}")
continue
return comments
def get_file_content(file: str) -> str | None:
"""Get file content from Gitea.
Args:
file: str, file name
Returns:
str | None: file content, or None if failed to get file content
"""
repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"]
branch = EVENT_DATA["pull_request"]["head"]["ref"]
replaced_file = file.replace("/", "%2F")
url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}"
try:
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Failed to get file content: {e}")
return None
def analyze_full_context(
full_context_model: Model, parsed_diff: list[dict[str, Any]]
) -> str:
"""Analyze full context and create review.
Args:
full_context_model: AI Session for full context analysis
parsed_diff: list[dict[str, Any]], parsed diff
Returns:
str: review for full context
"""
file_contents = []
for diff in parsed_diff:
file = diff["file"]
chunk = diff["chunk"]
content = get_file_content(file)
if content is None:
continue
file_contents.append(f"File: {file}")
file_contents.append(content)
file_contents.append(f"Diff: {chunk}")
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
response = full_context_model.get_response_full_context(
title, description, file_contents
)
response = response.strip("`").lstrip("markdown").strip()
return response
def post_review(
full_context_review: str, single_chunk_comments: list[dict[str, Any]]
) -> None:
"""Post review to Gitea.
Args:
full_context_review: str, review for full context
single_chunk_comments: list[dict[str, Any]], comments for single chunk review
"""
repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"]
pull_number = EVENT_DATA["number"]
commit_id = EVENT_DATA["pull_request"]["head"]["sha"]
url = f"{repo_url}/pulls/{pull_number}/reviews"
data = {
"body": full_context_review,
"event": "COMMENT",
"comments": single_chunk_comments,
"commit_id": commit_id,
}
response = requests.post(url, headers=HEADERS, json=data)
response.raise_for_status()
def main() -> None:
"""Code Reviewer for Gitea."""
if EVENT_DATA["action"] not in ["opened", "synchronized"]:
print("Unsupproted event.")
return
diff = get_diff()
if diff is None:
return
elif not diff:
print("No diff found.")
return
full_context_model = Model(
model=FULL_CONTEXT_MODEL_NAME,
api_key=FULL_CONTEXT_API_KEY,
is_full_context=True,
)
single_chunk_model = Model(
model=SINGLE_CHUNK_MODEL_NAME,
api_key=SINGLE_CHUNK_API_KEY,
is_full_context=False,
)
parsed_diff = parse_diff(diff)
comments = analyze_single_chunks(single_chunk_model, parsed_diff)
full_context_response = analyze_full_context(full_context_model, parsed_diff)
post_review(full_context_response, comments)
if __name__ == "__main__":
main()