change ci #13

Merged
mschoi merged 1 commits from change_ci into main 2025-01-28 00:10:44 +09:00
3 changed files with 113 additions and 68 deletions

View File

@@ -1,5 +1,6 @@
"""Code Reviewer for Gitea."""
import asyncio
import fnmatch
import json
import os
@@ -7,6 +8,7 @@ import re
from typing import Any
import requests
import aiohttp
from model import Model
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "")
@@ -57,8 +59,8 @@ def parse_diff(diff: str) -> list[dict[str, Any]]:
r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S
)
old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$")
hunk_pattern = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)(?=^@@ |$)",
chunk_range_pattern = re.compile(
r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@|\Z)",
re.MULTILINE | re.DOTALL,
)
list_diff = []
@@ -77,33 +79,31 @@ def parse_diff(diff: str) -> list[dict[str, Any]]:
print("Neglict deleted file")
continue
new_file = new_file.lstrip("b/")
hunk_match = hunk_pattern.search(diff_text)
if hunk_match is None:
continue
old_idx = int(hunk_match.group(1))
new_idx = int(hunk_match.group(3))
remain_text = diff_text[hunk_match.end() + 1 :]
diff_text = []
for line in remain_text.splitlines():
if line.startswith("-"):
diff_text.append(f"{old_idx} {line}")
old_idx += 1
elif line.startswith("+"):
diff_text.append(f"{new_idx} {line}")
new_idx += 1
else:
diff_text.append(line)
diff_text = "\n".join(diff_text)
if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS):
print(f"Exclude file {new_file}")
continue
output_diff_text = []
for chunk_range_match in chunk_range_pattern.finditer(diff_text):
old_idx = int(chunk_range_match.group(1))
new_idx = int(chunk_range_match.group(3))
for line in chunk_range_match.group(5).splitlines():
if line.startswith("-"):
output_diff_text.append(f"{old_idx} None {line}")
old_idx += 1
elif line.startswith("+"):
output_diff_text.append(f"None {new_idx} {line}")
new_idx += 1
else:
output_diff_text.append(f"{old_idx} {new_idx} {line}")
old_idx += 1
new_idx += 1
output_diff_text = "\n".join(output_diff_text)
list_diff.append(
{
"file": new_file,
"chunk": diff_text,
"chunk": output_diff_text,
}
)
return list_diff
@@ -133,7 +133,7 @@ def create_comment(
return comments
def analyze_single_chunks(
async def analyze_single_chunks(
single_chunk_model: Model, parsed_diff: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""Analyze single chunks and create comments.
@@ -145,29 +145,33 @@ def analyze_single_chunks(
Returns:
list[dict[str, Any]]: comments for single chunk review
"""
comments = []
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
for diff in parsed_diff:
async def process_single_chunk(diff: dict[str, Any]):
file = diff["file"]
chunk = diff["chunk"]
response = single_chunk_model.get_response_single_chunk(
response = await single_chunk_model.get_response_single_chunk(
file, title, description, chunk
)
response = response.strip("`").lstrip("json").strip() or "[]"
try:
response_json = json.loads(response)
new_comments = create_comment(file, response_json)
comments.extend(new_comments)
return create_comment(file, response_json)
except json.JSONDecodeError:
print(f"Failed to parse response: {response}")
continue
return []
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
tasks = [process_single_chunk(diff) for diff in parsed_diff]
results = await asyncio.gather(*tasks)
# Flatten the list of comments
comments = [comment for result in results for comment in result]
return comments
def get_file_content(file: str) -> str | None:
async def get_file_content(file: str) -> str | None:
"""Get file content from Gitea.
Args:
@@ -183,15 +187,18 @@ def get_file_content(file: str) -> str | None:
url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}"
try:
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Failed to get file content: {e}")
return None
async with aiohttp.ClientSession(headers=HEADERS) as session:
async with session.get(url) as response:
response.raise_for_status()
return await response.text()
except aiohttp.ClientError as e: # More specific exception handling
print(f"Network error fetching {file}: {e}")
except asyncio.TimeoutError:
print(f"Timeout fetching {file}")
return None
def analyze_full_context(
async def analyze_full_context(
full_context_model: Model, parsed_diff: list[dict[str, Any]]
) -> str:
"""Analyze full context and create review.
@@ -203,22 +210,26 @@ def analyze_full_context(
Returns:
str: review for full context
"""
file_contents = []
for diff in parsed_diff:
async def get_file_data(diff: dict[str, Any]):
file = diff["file"]
chunk = diff["chunk"]
content = get_file_content(file)
if content is None:
continue
file_contents.append(f"File: {file}")
file_contents.append(content)
file_contents.append(f"Diff: {chunk}")
return None
return f"File: {file}\n{content}\nDiff: {chunk}"
tasks = [get_file_data(diff) for diff in parsed_diff]
file_contents_list = await asyncio.gather(*tasks)
file_contents = [item for item in file_contents_list if item is not None]
if not file_contents:
return ""
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
response = full_context_model.get_response_full_context(
response = await full_context_model.get_response_full_context(
title, description, file_contents
)
response = response.strip("`").lstrip("markdown").strip()
@@ -248,10 +259,10 @@ def post_review(
response.raise_for_status()
def main() -> None:
"""Code Reviewer for Gitea."""
async def main() -> None:
"""Code Reviewer for Gitea: Asynchronous version."""
if EVENT_DATA["action"] not in ["opened", "synchronized"]:
print("Unsupproted event.")
print("Unsupported event.")
return
diff = get_diff()
@@ -273,10 +284,21 @@ def main() -> None:
)
parsed_diff = parse_diff(diff)
comments = analyze_single_chunks(single_chunk_model, parsed_diff)
full_context_response = analyze_full_context(full_context_model, parsed_diff)
comments_task = asyncio.create_task(
analyze_single_chunks(single_chunk_model, parsed_diff)
)
if EVENT_DATA["action"] == "opened":
full_context_response_task = asyncio.create_task(
analyze_full_context(full_context_model, parsed_diff)
)
full_context_response = await full_context_response_task
else:
full_context_response = ""
comments = await comments_task
post_review(full_context_response, comments)
if __name__ == "__main__":
main()
asyncio.run(main())

View File

@@ -4,8 +4,16 @@ from enum import Enum
from typing import Any
import google.generativeai as genai
from anthropic import Anthropic
from openai import OpenAI
import typing_extensions as typing
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI
class GoogleResponse(typing.TypedDict):
"""The response from Google model."""
lineNumber: int
reviewComment: str
class ModelProvider(Enum):
@@ -79,16 +87,18 @@ class Model:
"""
match self.provider:
case ModelProvider.OPENAI:
return OpenAI(api_key=api_key)
return AsyncOpenAI(api_key=api_key)
case ModelProvider.ANTHROPIC:
return Anthropic(api_key=api_key)
return AsyncAnthropic(api_key=api_key)
case ModelProvider.GOOGLE:
genai.configure(api_key=api_key)
return genai.GenerativeModel(model=self.model, api_key=api_key)
return genai.GenerativeModel(
model_name=self.model, system_instruction=self.system_prompt
)
case ModelProvider.DEEPSEEK:
return OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com")
def request(self, prompt: str) -> str:
async def request(self, prompt: str) -> str:
"""Request the model to generate a response.
Args:
@@ -99,7 +109,7 @@ class Model:
"""
match self.provider:
case ModelProvider.OPENAI | ModelProvider.DEEPSEEK:
response = self.session.chat.completions.create(
response = await self.session.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": self.system_prompt},
@@ -113,7 +123,7 @@ class Model:
)
return response.choices[0].message.content.strip()
case ModelProvider.ANTHROPIC:
response = self.session.messages.create(
response = await self.session.messages.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
system=[
@@ -128,10 +138,16 @@ class Model:
)
return response.content[0].text.strip()
case ModelProvider.GOOGLE:
response = self.session.generate_content(prompt)
response = await self.session.generate_content_async(
prompt,
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema=list[GoogleResponse],
),
)
return response.text.strip()
def get_response_single_chunk(
async def get_response_single_chunk(
self, file: str, title: str, description: str, chunk: str
) -> str:
"""Get the response for a single chunk.
@@ -146,9 +162,9 @@ class Model:
str: The response.
"""
prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk)
return self.request(prompt)
return await self.request(prompt)
def get_response_full_context(
async def get_response_full_context(
self, title: str, description: str, file_contents: list[str]
) -> str:
"""Get the response for full context.
@@ -165,7 +181,7 @@ class Model:
prompt = FULL_CONTEXT_USER_PROMPT.format(
title, description, "\n".join(file_contents)
)
return self.request(prompt)
return await self.request(prompt)
except Exception as e:
print(f"Error during full context response: {e}")
print(prompt)
@@ -175,14 +191,21 @@ class Model:
SINGLE_CHUNK_SYSTEM_PROMPT = (
"Your task is to review pull requests. Instructions:\n"
"- Provide the response in the following JSON format: "
"""[{{"lineNumber": <line_number>, "reviewComment": "<review comment>"}}] \n"""
"""[{{"lineNumber": int, "reviewComment": str}}] \n"""
"- lineNumber is about the line number of the code that in new file. \n"
"- lineNumber can be found at the front of each line. \n"
"- At the first number is old line number, the second number is new line number. \n"
"- If the line starts with `+`, it means the line is added. \n"
"- If the line starts with `-`, it means the line is deleted. \n"
"- Evaluate whether the code changes and additions are appropriate "
"and if the new code structure is suitable. \n"
"- Do not give positive comments or compliments. \n"
"- Provide comments and suggestions ONLY if there is something to improve"
"otherwise return an empty array. \n"
"- Write the comment in GitHub Markdown format. \n"
"- Use the given description only for the overall context "
"and only comment the code. \n"
"- Do not suggest type hint or naming convention. \n"
"- IMPORTANT: NEVER suggest adding comments to the code. \n"
)
SINGLE_CHUNK_USER_PROMPT = (

View File

@@ -21,7 +21,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests py-gitea openai anthropic google-generativeai
pip install aiohttp requests py-gitea openai anthropic google-generativeai
- name: Run Code Review
env: