Compare commits
5 Commits
mschoi-pat
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| c95fa0ed80 | |||
| 382bbc7689 | |||
| 6e8a95b056 | |||
| 29dc178ec4 | |||
| 5785abd22e |
@@ -1,5 +1,6 @@
|
||||
"""Code Reviewer for Gitea."""
|
||||
|
||||
import asyncio
|
||||
import fnmatch
|
||||
import json
|
||||
import os
|
||||
@@ -7,6 +8,7 @@ import re
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
import aiohttp
|
||||
from model import Model
|
||||
|
||||
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "")
|
||||
@@ -57,8 +59,8 @@ def parse_diff(diff: str) -> list[dict[str, Any]]:
|
||||
r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S
|
||||
)
|
||||
old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$")
|
||||
hunk_pattern = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)(?=^@@ |$)",
|
||||
chunk_range_pattern = re.compile(
|
||||
r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@|\Z)",
|
||||
re.MULTILINE | re.DOTALL,
|
||||
)
|
||||
list_diff = []
|
||||
@@ -77,33 +79,31 @@ def parse_diff(diff: str) -> list[dict[str, Any]]:
|
||||
print("Neglict deleted file")
|
||||
continue
|
||||
new_file = new_file.lstrip("b/")
|
||||
|
||||
hunk_match = hunk_pattern.search(diff_text)
|
||||
if hunk_match is None:
|
||||
continue
|
||||
old_idx = int(hunk_match.group(1))
|
||||
new_idx = int(hunk_match.group(3))
|
||||
remain_text = diff_text[hunk_match.end() + 1 :]
|
||||
diff_text = []
|
||||
for line in remain_text.splitlines():
|
||||
if line.startswith("-"):
|
||||
diff_text.append(f"{old_idx} {line}")
|
||||
old_idx += 1
|
||||
elif line.startswith("+"):
|
||||
diff_text.append(f"{new_idx} {line}")
|
||||
new_idx += 1
|
||||
else:
|
||||
diff_text.append(line)
|
||||
diff_text = "\n".join(diff_text)
|
||||
|
||||
if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS):
|
||||
print(f"Exclude file {new_file}")
|
||||
continue
|
||||
|
||||
output_diff_text = []
|
||||
for chunk_range_match in chunk_range_pattern.finditer(diff_text):
|
||||
old_idx = int(chunk_range_match.group(1))
|
||||
new_idx = int(chunk_range_match.group(3))
|
||||
for line in chunk_range_match.group(5).splitlines():
|
||||
if line.startswith("-"):
|
||||
output_diff_text.append(f"{old_idx} None {line}")
|
||||
old_idx += 1
|
||||
elif line.startswith("+"):
|
||||
output_diff_text.append(f"None {new_idx} {line}")
|
||||
new_idx += 1
|
||||
else:
|
||||
output_diff_text.append(f"{old_idx} {new_idx} {line}")
|
||||
old_idx += 1
|
||||
new_idx += 1
|
||||
|
||||
output_diff_text = "\n".join(output_diff_text)
|
||||
list_diff.append(
|
||||
{
|
||||
"file": new_file,
|
||||
"chunk": diff_text,
|
||||
"chunk": output_diff_text,
|
||||
}
|
||||
)
|
||||
return list_diff
|
||||
@@ -133,7 +133,7 @@ def create_comment(
|
||||
return comments
|
||||
|
||||
|
||||
def analyze_single_chunks(
|
||||
async def analyze_single_chunks(
|
||||
single_chunk_model: Model, parsed_diff: list[dict[str, Any]]
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Analyze single chunks and create comments.
|
||||
@@ -145,29 +145,33 @@ def analyze_single_chunks(
|
||||
Returns:
|
||||
list[dict[str, Any]]: comments for single chunk review
|
||||
"""
|
||||
comments = []
|
||||
title = EVENT_DATA["pull_request"]["title"]
|
||||
description = EVENT_DATA["pull_request"]["body"]
|
||||
for diff in parsed_diff:
|
||||
|
||||
async def process_single_chunk(diff: dict[str, Any]):
|
||||
file = diff["file"]
|
||||
chunk = diff["chunk"]
|
||||
response = single_chunk_model.get_response_single_chunk(
|
||||
response = await single_chunk_model.get_response_single_chunk(
|
||||
file, title, description, chunk
|
||||
)
|
||||
response = response.strip("`").lstrip("json").strip() or "[]"
|
||||
|
||||
try:
|
||||
response_json = json.loads(response)
|
||||
new_comments = create_comment(file, response_json)
|
||||
comments.extend(new_comments)
|
||||
return create_comment(file, response_json)
|
||||
except json.JSONDecodeError:
|
||||
print(f"Failed to parse response: {response}")
|
||||
continue
|
||||
return []
|
||||
|
||||
title = EVENT_DATA["pull_request"]["title"]
|
||||
description = EVENT_DATA["pull_request"]["body"]
|
||||
tasks = [process_single_chunk(diff) for diff in parsed_diff]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# Flatten the list of comments
|
||||
comments = [comment for result in results for comment in result]
|
||||
return comments
|
||||
|
||||
|
||||
def get_file_content(file: str) -> str | None:
|
||||
async def get_file_content(file: str) -> str | None:
|
||||
"""Get file content from Gitea.
|
||||
|
||||
Args:
|
||||
@@ -183,15 +187,18 @@ def get_file_content(file: str) -> str | None:
|
||||
url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}"
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=HEADERS)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except requests.RequestException as e:
|
||||
print(f"Failed to get file content: {e}")
|
||||
return None
|
||||
async with aiohttp.ClientSession(headers=HEADERS) as session:
|
||||
async with session.get(url) as response:
|
||||
response.raise_for_status()
|
||||
return await response.text()
|
||||
except aiohttp.ClientError as e: # More specific exception handling
|
||||
print(f"Network error fetching {file}: {e}")
|
||||
except asyncio.TimeoutError:
|
||||
print(f"Timeout fetching {file}")
|
||||
return None
|
||||
|
||||
|
||||
def analyze_full_context(
|
||||
async def analyze_full_context(
|
||||
full_context_model: Model, parsed_diff: list[dict[str, Any]]
|
||||
) -> str:
|
||||
"""Analyze full context and create review.
|
||||
@@ -203,22 +210,26 @@ def analyze_full_context(
|
||||
Returns:
|
||||
str: review for full context
|
||||
"""
|
||||
file_contents = []
|
||||
for diff in parsed_diff:
|
||||
|
||||
async def get_file_data(diff: dict[str, Any]):
|
||||
file = diff["file"]
|
||||
chunk = diff["chunk"]
|
||||
content = get_file_content(file)
|
||||
if content is None:
|
||||
continue
|
||||
file_contents.append(f"File: {file}")
|
||||
file_contents.append(content)
|
||||
file_contents.append(f"Diff: {chunk}")
|
||||
return None
|
||||
return f"File: {file}\n{content}\nDiff: {chunk}"
|
||||
|
||||
tasks = [get_file_data(diff) for diff in parsed_diff]
|
||||
file_contents_list = await asyncio.gather(*tasks)
|
||||
|
||||
file_contents = [item for item in file_contents_list if item is not None]
|
||||
|
||||
if not file_contents:
|
||||
return ""
|
||||
|
||||
title = EVENT_DATA["pull_request"]["title"]
|
||||
description = EVENT_DATA["pull_request"]["body"]
|
||||
response = full_context_model.get_response_full_context(
|
||||
response = await full_context_model.get_response_full_context(
|
||||
title, description, file_contents
|
||||
)
|
||||
response = response.strip("`").lstrip("markdown").strip()
|
||||
@@ -248,10 +259,10 @@ def post_review(
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Code Reviewer for Gitea."""
|
||||
async def main() -> None:
|
||||
"""Code Reviewer for Gitea: Asynchronous version."""
|
||||
if EVENT_DATA["action"] not in ["opened", "synchronized"]:
|
||||
print("Unsupproted event.")
|
||||
print("Unsupported event.")
|
||||
return
|
||||
|
||||
diff = get_diff()
|
||||
@@ -273,10 +284,21 @@ def main() -> None:
|
||||
)
|
||||
|
||||
parsed_diff = parse_diff(diff)
|
||||
comments = analyze_single_chunks(single_chunk_model, parsed_diff)
|
||||
full_context_response = analyze_full_context(full_context_model, parsed_diff)
|
||||
comments_task = asyncio.create_task(
|
||||
analyze_single_chunks(single_chunk_model, parsed_diff)
|
||||
)
|
||||
|
||||
if EVENT_DATA["action"] == "opened":
|
||||
full_context_response_task = asyncio.create_task(
|
||||
analyze_full_context(full_context_model, parsed_diff)
|
||||
)
|
||||
full_context_response = await full_context_response_task
|
||||
else:
|
||||
full_context_response = ""
|
||||
|
||||
comments = await comments_task
|
||||
post_review(full_context_response, comments)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -4,8 +4,16 @@ from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import google.generativeai as genai
|
||||
from anthropic import Anthropic
|
||||
from openai import OpenAI
|
||||
import typing_extensions as typing
|
||||
from anthropic import AsyncAnthropic
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
|
||||
class GoogleResponse(typing.TypedDict):
|
||||
"""The response from Google model."""
|
||||
|
||||
lineNumber: int
|
||||
reviewComment: str
|
||||
|
||||
|
||||
class ModelProvider(Enum):
|
||||
@@ -35,6 +43,7 @@ class ModelProvider(Enum):
|
||||
PREFIX_TO_MODEL = {
|
||||
"gpt": ModelProvider.OPENAI,
|
||||
"o1": ModelProvider.OPENAI,
|
||||
"o3": ModelProvider.OPENAI,
|
||||
"claude": ModelProvider.ANTHROPIC,
|
||||
"gemini": ModelProvider.GOOGLE,
|
||||
"deepseek": ModelProvider.DEEPSEEK,
|
||||
@@ -79,16 +88,18 @@ class Model:
|
||||
"""
|
||||
match self.provider:
|
||||
case ModelProvider.OPENAI:
|
||||
return OpenAI(api_key=api_key)
|
||||
return AsyncOpenAI(api_key=api_key)
|
||||
case ModelProvider.ANTHROPIC:
|
||||
return Anthropic(api_key=api_key)
|
||||
return AsyncAnthropic(api_key=api_key)
|
||||
case ModelProvider.GOOGLE:
|
||||
genai.configure(api_key=api_key)
|
||||
return genai.GenerativeModel(model=self.model, api_key=api_key)
|
||||
return genai.GenerativeModel(
|
||||
model_name=self.model, system_instruction=self.system_prompt
|
||||
)
|
||||
case ModelProvider.DEEPSEEK:
|
||||
return OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
|
||||
return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com")
|
||||
|
||||
def request(self, prompt: str) -> str:
|
||||
async def request(self, prompt: str) -> str:
|
||||
"""Request the model to generate a response.
|
||||
|
||||
Args:
|
||||
@@ -99,7 +110,7 @@ class Model:
|
||||
"""
|
||||
match self.provider:
|
||||
case ModelProvider.OPENAI | ModelProvider.DEEPSEEK:
|
||||
response = self.session.chat.completions.create(
|
||||
response = await self.session.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": self.system_prompt},
|
||||
@@ -113,7 +124,7 @@ class Model:
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
case ModelProvider.ANTHROPIC:
|
||||
response = self.session.messages.create(
|
||||
response = await self.session.messages.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
system=[
|
||||
@@ -128,10 +139,16 @@ class Model:
|
||||
)
|
||||
return response.content[0].text.strip()
|
||||
case ModelProvider.GOOGLE:
|
||||
response = self.session.generate_content(prompt)
|
||||
response = await self.session.generate_content_async(
|
||||
prompt,
|
||||
generation_config=genai.GenerationConfig(
|
||||
response_mime_type="application/json",
|
||||
response_schema=list[GoogleResponse],
|
||||
),
|
||||
)
|
||||
return response.text.strip()
|
||||
|
||||
def get_response_single_chunk(
|
||||
async def get_response_single_chunk(
|
||||
self, file: str, title: str, description: str, chunk: str
|
||||
) -> str:
|
||||
"""Get the response for a single chunk.
|
||||
@@ -146,9 +163,9 @@ class Model:
|
||||
str: The response.
|
||||
"""
|
||||
prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk)
|
||||
return self.request(prompt)
|
||||
return await self.request(prompt)
|
||||
|
||||
def get_response_full_context(
|
||||
async def get_response_full_context(
|
||||
self, title: str, description: str, file_contents: list[str]
|
||||
) -> str:
|
||||
"""Get the response for full context.
|
||||
@@ -165,7 +182,7 @@ class Model:
|
||||
prompt = FULL_CONTEXT_USER_PROMPT.format(
|
||||
title, description, "\n".join(file_contents)
|
||||
)
|
||||
return self.request(prompt)
|
||||
return await self.request(prompt)
|
||||
except Exception as e:
|
||||
print(f"Error during full context response: {e}")
|
||||
print(prompt)
|
||||
@@ -175,14 +192,21 @@ class Model:
|
||||
SINGLE_CHUNK_SYSTEM_PROMPT = (
|
||||
"Your task is to review pull requests. Instructions:\n"
|
||||
"- Provide the response in the following JSON format: "
|
||||
"""[{{"lineNumber": <line_number>, "reviewComment": "<review comment>"}}] \n"""
|
||||
"""[{{"lineNumber": int, "reviewComment": str}}] \n"""
|
||||
"- lineNumber is about the line number of the code that in new file. \n"
|
||||
"- lineNumber can be found at the front of each line. \n"
|
||||
"- At the first number is old line number, the second number is new line number. \n"
|
||||
"- If the line starts with `+`, it means the line is added. \n"
|
||||
"- If the line starts with `-`, it means the line is deleted. \n"
|
||||
"- Evaluate whether the code changes and additions are appropriate "
|
||||
"and if the new code structure is suitable. \n"
|
||||
"- Do not give positive comments or compliments. \n"
|
||||
"- Provide comments and suggestions ONLY if there is something to improve"
|
||||
"otherwise return an empty array. \n"
|
||||
"- Write the comment in GitHub Markdown format. \n"
|
||||
"- Use the given description only for the overall context "
|
||||
"and only comment the code. \n"
|
||||
"- Do not suggest type hint or naming convention. \n"
|
||||
"- IMPORTANT: NEVER suggest adding comments to the code. \n"
|
||||
)
|
||||
SINGLE_CHUNK_USER_PROMPT = (
|
||||
|
||||
@@ -21,15 +21,15 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install requests py-gitea openai anthropic google-generativeai
|
||||
pip install aiohttp requests py-gitea openai anthropic google-generativeai
|
||||
|
||||
- name: Run Code Review
|
||||
env:
|
||||
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
|
||||
FULL_CONTEXT_MODEL: deepseek-reasoner
|
||||
FULL_CONTEXT_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
SINGLE_CHUNK_MODEL: gpt-4o
|
||||
SINGLE_CHUNK_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
FULL_CONTEXT_MODEL: o3-mini
|
||||
FULL_CONTEXT_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SINGLE_CHUNK_MODEL: gemini-2.0-flash-exp
|
||||
SINGLE_CHUNK_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
EXCLUDE: "*.yml,*.yaml"
|
||||
run: python .gitea/scripts/code_review.py
|
||||
|
||||
|
||||
Reference in New Issue
Block a user