Initial commit

This commit is contained in:
2025-04-22 22:39:50 +09:00
commit 767e35818d
9 changed files with 774 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
name: Bug report
description: An issue with MIN-T
labels: 'bug'
body:
- type: textarea
attributes:
label: Minimal reproducible code
description: Please write a minimal complete program which has this bug. Do not point to an existing repository.
value: |
...
validations:
required: true
- type: textarea
attributes:
label: Steps to reproduce the bug with the above code
validations:
required: true
- type: textarea
attributes:
label: Actual Behaviour
description: When I do like *this*, *that* is happening and I think it shouldn't.
validations:
required: true
- type: textarea
attributes:
label: Expected Behaviour
description: I think *this* should happen instead.
validations:
required: true
- type: textarea
attributes:
label: Additional Context
description: Add any other context about the problem here.

View File

@@ -0,0 +1,16 @@
name: Generic issue template
description: which is not covered by other templates
body:
- type: textarea
attributes:
label: Summary
description: Short description of the issue.
validations:
required: true
- type: textarea
attributes:
label: Details
description: Whatever you want to share
validations:
required: true

View File

@@ -0,0 +1,24 @@
name: Feature request
description: Suggest an idea for this project
labels: 'enhancement'
body:
- type: textarea
attributes:
label: Describe your use case
description: Describe the problem you're trying to solve. This is not mandatory and we *do* consider features without a specific use case, but real problems have priority.
validations:
required: true
- type: textarea
attributes:
label: Describe the solution you'd like
description: Please explain what the wanted solution should look like. You are **strongly encouraged** to attach a snippet of (pseudo)code.
validations:
required: true
- type: textarea
attributes:
label: Alternatives, if applicable
description: A clear and concise description of any alternative solutions or features you've managed to come up with.
- type: textarea
attributes:
label: Additional Context
description: Add any other context about the feature request here.

View File

@@ -0,0 +1,26 @@
## Summary (요약)
- Fill me
## Describe your changes (주요 변화)
- Fill me
## Issue number and link (관련 이슈)
- Fill me
## PR Type
<!-- [] 내에 "x" 표시로 해당PR의 타입을 표시하면 리뷰어가 코드를 이해하는데 도움을 줄 수 있습니다. -->
- [ ] Bugfix
- [ ] Feature
- [ ] Code style update (formatting, local variables)
- [ ] Refactoring (no functional changes, no api changes)
- [ ] Build related changes
- [ ] CI related changes
- [ ] Documentation content changes
- [ ] angular.io application / infrastructure changes
- [ ] Other... Please describe:
## To Reveiwer
- 리뷰어에게 하고싶은 메세지
## Reference
- N/A

View File

@@ -0,0 +1,304 @@
"""Code Reviewer for Gitea."""
import asyncio
import fnmatch
import json
import os
import re
from typing import Any
import requests
import aiohttp
from model import Model
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "")
HEADERS = {"Authorization": f"token {ACCESS_TOKEN}"}
GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH")
try:
with open(GITHUB_EVENT_PATH, "r") as f:
EVENT_DATA = json.load(f)
except FileNotFoundError:
print("Failed to load event data.")
exit(1)
FULL_CONTEXT_MODEL_NAME = os.getenv("FULL_CONTEXT_MODEL", "")
SINGLE_CHUNK_MODEL_NAME = os.getenv("SINGLE_CHUNK_MODEL", "")
FULL_CONTEXT_API_KEY = os.getenv("FULL_CONTEXT_API_KEY", "")
SINGLE_CHUNK_API_KEY = os.getenv("SINGLE_CHUNK_API_KEY", "")
EXCLUDE_PATTERNS = os.getenv("EXCLUDE", "").split(",")
def get_diff() -> str | None:
"""Get code difference between base and head from Gitea.
Returns:
str | None: code difference between base and head, or None if failed to get diff
"""
url = EVENT_DATA["pull_request"]["diff_url"]
try:
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Failed to get diff: {e}")
return None
def parse_diff(diff: str) -> list[dict[str, Any]]:
"""Parse diff into list of dicts.
Args:
diff: str, code difference between base and head
Returns:
list[dict[str, Any]]: list of dicts, each dict represents a code chunks
"""
file_pattern = re.compile(
r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S
)
old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$")
chunk_range_pattern = re.compile(
r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@|\Z)",
re.MULTILINE | re.DOTALL,
)
list_diff = []
for match in file_pattern.finditer(diff):
diff_text = match.group(3)
old_new_match = list(old_new_pattern.finditer(diff_text))
if len(old_new_match) != 2:
continue
old_file = old_new_match[0].group(2)
old_file = old_file.lstrip("a/") if old_file.startswith("a/") else old_file
new_file = old_new_match[1].group(2)
if new_file == "/dev/null":
print("Neglict deleted file")
continue
new_file = new_file.lstrip("b/")
if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS):
print(f"Exclude file {new_file}")
continue
output_diff_text = []
for chunk_range_match in chunk_range_pattern.finditer(diff_text):
old_idx = int(chunk_range_match.group(1))
new_idx = int(chunk_range_match.group(3))
for line in chunk_range_match.group(5).splitlines():
if line.startswith("-"):
output_diff_text.append(f"{old_idx} None {line}")
old_idx += 1
elif line.startswith("+"):
output_diff_text.append(f"None {new_idx} {line}")
new_idx += 1
else:
output_diff_text.append(f"{old_idx} {new_idx} {line}")
old_idx += 1
new_idx += 1
output_diff_text = "\n".join(output_diff_text)
list_diff.append(
{
"file": new_file,
"chunk": output_diff_text,
}
)
return list_diff
def create_comment(
file: str, ai_response: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""Create comments for single chunk review.
Args:
file: str, file name
ai_response: list[dict[str, Any]], AI response for single chunk review
Returns:
list[dict[str, Any]]: comments for single chunk review
"""
comments = []
for ai_response in ai_response:
comments.append(
{
"body": f"[REVIEW] {ai_response['reviewComment']}",
"path": file,
"new_position": int(ai_response["lineNumber"]),
}
)
return comments
async def analyze_single_chunks(
single_chunk_model: Model, parsed_diff: list[dict[str, Any]]
) -> list[dict[str, Any]]:
"""Analyze single chunks and create comments.
Args:
single_chunk_model: AI Session for single chunk analysis
parsed_diff: list[dict[str, Any]], parsed diff
Returns:
list[dict[str, Any]]: comments for single chunk review
"""
async def process_single_chunk(diff: dict[str, Any]):
file = diff["file"]
chunk = diff["chunk"]
response = await single_chunk_model.get_response_single_chunk(
file, title, description, chunk
)
response = response.strip("`").lstrip("json").strip() or "[]"
try:
response_json = json.loads(response)
return create_comment(file, response_json)
except json.JSONDecodeError:
print(f"Failed to parse response: {response}")
return []
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
tasks = [process_single_chunk(diff) for diff in parsed_diff]
results = await asyncio.gather(*tasks)
# Flatten the list of comments
comments = [comment for result in results for comment in result]
return comments
async def get_file_content(file: str) -> str | None:
"""Get file content from Gitea.
Args:
file: str, file name
Returns:
str | None: file content, or None if failed to get file content
"""
repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"]
branch = EVENT_DATA["pull_request"]["head"]["ref"]
replaced_file = file.replace("/", "%2F")
url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}"
try:
async with aiohttp.ClientSession(headers=HEADERS) as session:
async with session.get(url) as response:
response.raise_for_status()
return await response.text()
except aiohttp.ClientError as e: # More specific exception handling
print(f"Network error fetching {file}: {e}")
except asyncio.TimeoutError:
print(f"Timeout fetching {file}")
return None
async def analyze_full_context(
full_context_model: Model, parsed_diff: list[dict[str, Any]]
) -> str:
"""Analyze full context and create review.
Args:
full_context_model: AI Session for full context analysis
parsed_diff: list[dict[str, Any]], parsed diff
Returns:
str: review for full context
"""
async def get_file_data(diff: dict[str, Any]):
file = diff["file"]
chunk = diff["chunk"]
content = get_file_content(file)
if content is None:
return None
return f"File: {file}\n{content}\nDiff: {chunk}"
tasks = [get_file_data(diff) for diff in parsed_diff]
file_contents_list = await asyncio.gather(*tasks)
file_contents = [item for item in file_contents_list if item is not None]
if not file_contents:
return ""
title = EVENT_DATA["pull_request"]["title"]
description = EVENT_DATA["pull_request"]["body"]
response = await full_context_model.get_response_full_context(
title, description, file_contents
)
response = response.strip("`").lstrip("markdown").strip()
return response
def post_review(
full_context_review: str, single_chunk_comments: list[dict[str, Any]]
) -> None:
"""Post review to Gitea.
Args:
full_context_review: str, review for full context
single_chunk_comments: list[dict[str, Any]], comments for single chunk review
"""
repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"]
pull_number = EVENT_DATA["number"]
commit_id = EVENT_DATA["pull_request"]["head"]["sha"]
url = f"{repo_url}/pulls/{pull_number}/reviews"
data = {
"body": full_context_review,
"event": "COMMENT",
"comments": single_chunk_comments,
"commit_id": commit_id,
}
response = requests.post(url, headers=HEADERS, json=data)
response.raise_for_status()
async def main() -> None:
"""Code Reviewer for Gitea: Asynchronous version."""
if EVENT_DATA["action"] not in ["opened", "synchronized"]:
print("Unsupported event.")
return
diff = get_diff()
if diff is None:
return
elif not diff:
print("No diff found.")
return
full_context_model = Model(
model=FULL_CONTEXT_MODEL_NAME,
api_key=FULL_CONTEXT_API_KEY,
is_full_context=True,
)
single_chunk_model = Model(
model=SINGLE_CHUNK_MODEL_NAME,
api_key=SINGLE_CHUNK_API_KEY,
is_full_context=False,
)
parsed_diff = parse_diff(diff)
comments_task = asyncio.create_task(
analyze_single_chunks(single_chunk_model, parsed_diff)
)
if EVENT_DATA["action"] == "opened":
full_context_response_task = asyncio.create_task(
analyze_full_context(full_context_model, parsed_diff)
)
full_context_response = await full_context_response_task
else:
full_context_response = ""
comments = await comments_task
post_review(full_context_response, comments)
if __name__ == "__main__":
asyncio.run(main())

261
.gitea/scripts/model.py Normal file
View File

@@ -0,0 +1,261 @@
"""Model for code review."""
from enum import Enum
from typing import Any
import google.generativeai as genai
import typing_extensions as typing
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI
class GoogleResponse(typing.TypedDict):
"""The response from Google model."""
lineNumber: int
reviewComment: str
class ModelProvider(Enum):
"""The model provider."""
OPENAI = "openai"
ANTHROPIC = "anthropic"
GOOGLE = "google"
DEEPSEEK = "deepseek"
@classmethod
def from_model(cls, model: str) -> "ModelProvider":
"""Get the model provider from the model name.
Args:
model (str): The model name.
Returns:
ModelProvider: The model provider.
"""
for prefix, provider in PREFIX_TO_MODEL.items():
if model.startswith(prefix):
return provider
raise ValueError(f"Unknown model: {model}")
PREFIX_TO_MODEL = {
"gpt": ModelProvider.OPENAI,
"o1": ModelProvider.OPENAI,
"o3": ModelProvider.OPENAI,
"claude": ModelProvider.ANTHROPIC,
"gemini": ModelProvider.GOOGLE,
"deepseek": ModelProvider.DEEPSEEK,
}
class Model:
"""The model class.
Attributes:
model (str): The model name.
api_key (str): The API key.
system_prompt (str): The system prompt.
max_tokens (int): The maximum tokens.
"""
def __init__( # noqa: D107
self,
model: str,
api_key: str,
is_full_context: bool,
max_tokens: int = 4196,
):
self.model = model
self.system_prompt = (
FULL_CONTEXT_SYSTEM_PROMPT
if is_full_context
else SINGLE_CHUNK_SYSTEM_PROMPT
)
self.max_tokens = max_tokens
self.provider = ModelProvider.from_model(model)
self.session = self.create_session(api_key)
def create_session(self, api_key: str) -> Any:
"""Create a session for the model.
Args:
api_key (str): The API key.
Returns:
Any: The session.
"""
match self.provider:
case ModelProvider.OPENAI:
return AsyncOpenAI(api_key=api_key)
case ModelProvider.ANTHROPIC:
return AsyncAnthropic(api_key=api_key)
case ModelProvider.GOOGLE:
genai.configure(api_key=api_key)
return genai.GenerativeModel(
model_name=self.model, system_instruction=self.system_prompt
)
case ModelProvider.DEEPSEEK:
return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com")
async def request(self, prompt: str) -> str:
"""Request the model to generate a response.
Args:
prompt (str): The prompt to generate a response for.
Returns:
str: The generated response.
"""
match self.provider:
case ModelProvider.OPENAI | ModelProvider.DEEPSEEK:
response = await self.session.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": prompt},
],
temperature=0.2,
max_tokens=self.max_tokens,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
)
return response.choices[0].message.content.strip()
case ModelProvider.ANTHROPIC:
response = await self.session.messages.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
system=[
{
"type": "text",
"text": self.system_prompt,
"cache_control": {"type": "ephemeral"},
}
],
temperature=0.2,
max_tokens=self.max_tokens,
)
return response.content[0].text.strip()
case ModelProvider.GOOGLE:
response = await self.session.generate_content_async(
prompt,
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema=list[GoogleResponse],
),
)
return response.text.strip()
async def get_response_single_chunk(
self, file: str, title: str, description: str, chunk: str
) -> str:
"""Get the response for a single chunk.
Args:
file (str): The file name.
title (str): The pull request title.
description (str): The pull request description.
chunk (str): The diff chunk.
Returns:
str: The response.
"""
prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk)
return await self.request(prompt)
async def get_response_full_context(
self, title: str, description: str, file_contents: list[str]
) -> str:
"""Get the response for full context.
Args:
title (str): The pull request title.
description (str): The pull request description.
file_contents (list[str]): The file contents, diffs.
Returns:
str: The response.
"""
try:
prompt = FULL_CONTEXT_USER_PROMPT.format(
title, description, "\n".join(file_contents)
)
return await self.request(prompt)
except Exception as e:
print(f"Error during full context response: {e}")
print(prompt)
return None
SINGLE_CHUNK_SYSTEM_PROMPT = (
"Your task is to review pull requests. Instructions:\n"
"- Provide the response in the following JSON format: "
"""[{{"lineNumber": int, "reviewComment": str}}] \n"""
"- lineNumber is about the line number of the code that in new file. \n"
"- lineNumber can be found at the front of each line. \n"
"- At the first number is old line number, the second number is new line number. \n"
"- If the line starts with `+`, it means the line is added. \n"
"- If the line starts with `-`, it means the line is deleted. \n"
"- Evaluate whether the code changes and additions are appropriate "
"and if the new code structure is suitable. \n"
"- Do not give positive comments or compliments. \n"
"- Provide comments and suggestions ONLY if there is something to improve"
"otherwise return an empty array. \n"
"- Write the comment in GitHub Markdown format. \n"
"- Use the given description only for the overall context "
"and only comment the code. \n"
"- Do not suggest type hint or naming convention. \n"
"- IMPORTANT: NEVER suggest adding comments to the code. \n"
)
SINGLE_CHUNK_USER_PROMPT = (
"Review the following code diff in the file "
"{} and take the pull request title and description into account "
"when writing the response. \n"
"Pull request title: {} \n"
"Pull request description: \n"
"--- \n"
"{} \n"
"--- \n"
"Git diff to review: \n"
"```diff \n"
"{} \n"
"```"
)
FULL_CONTEXT_SYSTEM_PROMPT = (
"You are an experienced software engineer specializing in reviewing pull "
"requests. Your task is to provide an overall code review summary for a PR. "
"Focus on assessing the following aspects:\n"
"1. **Code Structure & Architecture:** "
"Evaluate whether the code is well-organized, modular, "
"and adheres to clean code principles. Suggest improvements if needed.\n"
"2. **Refactoring Opportunities:** "
"Identify areas where the code can be optimized or simplified without changing "
"its behavior.\n"
"3. **Potential Future Problems:** "
"Highlight possible scalability, maintainability, or dependency issues that might "
"arise in the future based on the current implementation.\n"
"Be constructive and clear in your feedback. Avoid commenting on trivial issues "
"or syntax errors—focus on high-level feedback.\n"
"Precise instructions:\n"
"- Do not give positive comments or compliments.\n"
"- Provide comments and suggestions ONLY if there is something to improve, "
"otherwise return an empty string.\n"
"- Write the comment in GitHub Markdown format.\n"
"- Do not start with 'markdown' or '```markdown'.\n"
"- IMPORTANT: Give example code block or pseudo code if you can.\n"
)
FULL_CONTEXT_USER_PROMPT = (
"Review the following code and take the pull request title "
"and description into account when writing the response. \n"
"Pull request title: {} \n"
"Pull request description: \n"
"--- \n"
"{} \n"
"--- \n"
"Code to review: \n"
"{}"
)

View File

@@ -0,0 +1,35 @@
name: Code Review
on:
pull_request:
types: [opened, synchronize]
permissions:
contents: read
pull-requests: write
jobs:
review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install aiohttp requests py-gitea openai anthropic google-generativeai
- name: Run Code Review
env:
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
FULL_CONTEXT_MODEL: o3-mini
FULL_CONTEXT_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SINGLE_CHUNK_MODEL: gemini-2.0-flash-exp
SINGLE_CHUNK_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
EXCLUDE: "*.yml,*.yaml"
run: python .gitea/scripts/code_review.py

View File

@@ -0,0 +1,30 @@
name: mint_ci
on:
push:
branches: [ "main"]
pull_request:
types: [unlabeled, opened, synchronize, reopened]
env:
PYTHON_VERSION: "3.12.3"
jobs:
lint:
name: Check Python code using ruff
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
token: ${{ secrets.ACCESS_TOKEN }}
- uses: astral-sh/ruff-action@v1
with:
version: 0.7.4
args: check . --select=E5,F4,F8,D400,D403,D417,D100,D102,D103,D101,ANN001,ANN201 --output-format=full --exclude='**/test_*.py','**/__init__.py' --force-exclude
changed-files: 'true'
- uses: astral-sh/ruff-action@v1
with:
version: 0.7.4
args: check . --select=I --output-format=full --force-exclude
changed-files: 'true'
continue-on-error: true

View File

@@ -0,0 +1,45 @@
on:
push:
branches: [main]
pull_request:
types: [unlabeled, opened, synchronize, reopened]
merge_group:
name: Rust-lint
jobs:
rust_tests:
env:
RUST_BACKTRACE: full
name: Run rust tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: clippy
- uses: Swatinem/rust-cache@v2
- name: run clippy
run: cargo clippy --workspace --exclude rustpython_wasm -- -Dwarnings
- name: run rust tests
run: cargo test --workspace --verbose
- name: check compilation without threading
run: cargo check
lint:
name: Check Rust code with rustfmt and clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
- name: run rustfmt
run: cargo fmt --check