diff --git a/.github/scripts/code_review.py b/.github/scripts/code_review.py new file mode 100644 index 0000000..c796027 --- /dev/null +++ b/.github/scripts/code_review.py @@ -0,0 +1,379 @@ +import base64 +import os +import re +import fnmatch +import json +import datetime +from openai import OpenAI +from anthropic import Anthropic +import google.generativeai as genai +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from typing import Any, Optional, Callable + +import requests + +ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "") +HEADERS = {"Authorization": f"token {ACCESS_TOKEN}"} + +SINGLE_CHUNK_SYSTEM_PROMPT = """Your task is to review pull requests. Instructions: +- Provide the response in the following JSON format: [{{"lineNumber": , "reviewComment": ""}}] +- Do not give positive comments or compliments. +- Provide comments and suggestions ONLY if there is something to improve, otherwise return an empty array. +- Write the comment in GitHub Markdown format. +- Use the given description only for the overall context and only comment the code. +- IMPORTANT: NEVER suggest adding comments to the code. +""" + +FULL_CONTEXT_SYSTEM_PROMPT = """You are an experienced software engineer specializing in reviewing pull requests. Your task is to provide an overall code review summary for a PR. Focus on assessing the following aspects: + +1. **Code Structure & Architecture:** Evaluate whether the code is well-organized, modular, and adheres to clean code principles. Suggest improvements if needed. + +2. **Refactoring Opportunities:** Identify areas where the code can be optimized or simplified without changing its behavior. + +3. **Potential Future Problems:** Highlight possible scalability, maintainability, or dependency issues that might arise in the future based on the current implementation. + +Be constructive and clear in your feedback. Avoid commenting on trivial issues or syntax errors—focus on high-level feedback. + +Precise instructions: +- Do not give positive comments or compliments. +- Provide comments and suggestions ONLY if there is something to improve, otherwise return an empty string. +- Write the comment in GitHub Markdown format. +- Do not start with "markdown" or "```markdown". +- IMPORTANT: Give example code block or pseudo code if you can. +""" + +GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") +with open(GITHUB_EVENT_PATH, "r") as f: + EVENT_DATA = json.load(f) + + +class PRDetails: + def __init__( + self, owner: str, repo: str, pull_number: int, title: str, description: str + ): + self.owner = owner + self.repo = repo + self.pull_number = pull_number + self.title = title + self.description = description + + +PR_DETAILS = PRDetails( + owner=EVENT_DATA["repository"]["owner"]["login"], + repo=EVENT_DATA["repository"]["name"], + pull_number=EVENT_DATA["number"], + title=EVENT_DATA["pull_request"]["title"], + description=EVENT_DATA["pull_request"]["body"], +) + +EXCLUDE_PATTERNS = os.getenv("EXCLUDE", "").split(",") + +FULL_CONTEXT_MODEL = os.getenv("FULL_CONTEXT_MODEL", "o1") +SINGLE_CHUNK_MODEL = os.getenv("SINGLE_CHUNK_MODEL", "claude-3-5-sonnet-20241022") + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "") +DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "") +GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "") + + +def parse_provider( + model: str, is_full_context: bool = False +) -> tuple[Callable, Callable]: + max_tokens = 4196 if is_full_context else 700 + system_prompt = ( + FULL_CONTEXT_SYSTEM_PROMPT if is_full_context else SINGLE_CHUNK_SYSTEM_PROMPT + ) + if any(key in model for key in ["o1", "gpt"]): + openai = OpenAI(api_key=OPENAI_API_KEY) + return ( + lambda prompt: openai.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ], + temperature=0.2, + max_tokens=max_tokens, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ), + lambda response: response.choices[0] + .message.content.strip() + .strip("`") + .lstrip("json") + .strip() + or "[]", + ) + elif any(key in model for key in ["claude", "haiku"]): + claude = Anthropic(api_key=CLAUDE_API_KEY) + return ( + lambda prompt: claude.messages.create( + model=model, + messages=[{"role": "user", "content": prompt}], + system=[ + { + "type": "text", + "text": system_prompt, + "cache_control": {"type": "ephemeral"}, + } + ], + temperature=0.2, + max_tokens=max_tokens, + ), + lambda response: response.content[0].text.strip() or "[]", + ) + elif any(key in model for key in ["deepseek"]): + deepseek = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com") + return ( + lambda prompt: deepseek.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ], + temperature=0.2, + max_tokens=max_tokens, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ), + lambda response: response.choices[0] + .message.content.strip() + .strip("`") + .lstrip("json") + .strip() + or "[]", + ) + elif any(key in model for key in ["gemini"]): + genai.configure(api_key=GOOGLE_API_KEY) + gemini = genai.GenerativeModel(model, system_instruction=system_prompt) + return ( + lambda prompt: gemini.generate_content(prompt), + lambda response: response.text.strip().strip("`").lstrip("json").strip() + or "[]", + ) + else: + raise ValueError(f"Invalid model: {model}") + + +FULL_CONTEXT_MESSAGE, FULL_CONTEXT_RESPONSE_PARSER = parse_provider( + FULL_CONTEXT_MODEL, is_full_context=True +) +SINGLE_CHUNK_MESSAGE, SINGLE_CHUNK_RESPONSE_PARSER = parse_provider( + SINGLE_CHUNK_MODEL, is_full_context=False +) + + +def get_diff() -> str | None: + """Get code difference between base and head from Gitea""" + url = EVENT_DATA["pull_request"]["diff_url"] + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + + if response.status_code != 200: + print(f"Failed to get diff with code : {response.status_code}") + return None + return response.text + + +def parse_diff(diff: str) -> list[dict[str, Any]]: + """Parse diff into list of dicts + + Args: + diff: str, code difference between base and head + + Returns: + list[dict[str, Any]]: list of dicts, each dict represents a code chunks + """ + file_pattern = re.compile( + r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S + ) + old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$") + list_diff = [] + for match in file_pattern.finditer(diff): + diff_text = match.group(3) + + old_new_match = list(old_new_pattern.finditer(diff_text)) + if len(old_new_match) != 2: + continue + + old_file = old_new_match[0].group(2) + old_file = old_file.lstrip("a/") if old_file.startswith("a/") else old_file + + new_file = old_new_match[1].group(2) + if new_file == "/dev/null": + print("Neglict deleted file") + continue + new_file = new_file.lstrip("b/") + + if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS): + print(f"Exclude file {new_file}") + continue + + list_diff.append( + { + "file": new_file, + "chunk": diff_text, + } + ) + return list_diff + + +def create_single_chunk_prompt(file: str, chunk: str) -> str: + return f""" +Review the following code diff in the file "{file}" and take the pull request title and description into account when writing the response. + +Pull request title: {PR_DETAILS.title} +Pull request description: + +--- +{PR_DETAILS.description} +--- + +Git diff to review: + +```diff +{chunk} +```""" + + +def get_ai_response_single_chunk(prompt: str) -> Optional[list[dict[str, Any]]]: + try: + response = SINGLE_CHUNK_MESSAGE(prompt) + content = SINGLE_CHUNK_RESPONSE_PARSER(response) + return json.loads(content) + except Exception as e: + print(f"Error during AI response: {e}") + print(response) + return None + + +def create_comment( + file: str, ai_response: list[dict[str, Any]] +) -> list[dict[str, Any]]: + comments = [] + for ai_response in ai_response: + comments.append( + { + "body": f"[REVIEW] {ai_response['reviewComment']}", + "path": file, + "new_position": int(ai_response["lineNumber"]), + } + ) + return comments + + +def analyze_single_chunks(parsed_diff: list[dict[str, Any]]) -> list[dict[str, Any]]: + comments = [] + for diff in parsed_diff: + file = diff["file"] + chunk = diff["chunk"] + prompt = create_single_chunk_prompt(file, chunk) + ai_response = get_ai_response_single_chunk(prompt) + if ai_response: + new_comments = create_comment(file, ai_response) + comments.extend(new_comments) + return comments + + +def get_file_content(file: str) -> str | None: + repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"] + branch = EVENT_DATA["pull_request"]["head"]["ref"] + + replaced_file = file.replace("/", "%2F") + url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}" + + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + + if response.status_code != 200: + print(f"Failed to get file content with code : {response.status_code}") + return None + return response.text + + +def get_ai_response_full_context(prompt: str) -> Optional[str]: + try: + response = FULL_CONTEXT_MESSAGE(prompt) + content = FULL_CONTEXT_RESPONSE_PARSER(response) + return content + except Exception as e: + print(f"Error during AI response: {e}") + print(response) + return None + + +def analyze_full_context(parsed_diff: list[dict[str, Any]]) -> str: + file_contents = [] + for diff in parsed_diff: + file = diff["file"] + chunk = diff["chunk"] + content = get_file_content(file) + if content is None: + continue + file_contents.append(f"File: {file}") + file_contents.append(content) + file_contents.append(f"Diff: {chunk}") + + whole_content = f"""Review the following code and take the pull request title and description into account when writing the response. + +Pull request title: {PR_DETAILS.title} +Pull request description: +--- +{PR_DETAILS.description} +--- + +Code to review: + +""" + "\n".join(file_contents) + ai_response = get_ai_response_full_context(whole_content) + if ai_response is None: + return None + + return ai_response + + +def post_review( + full_context_review: str, single_chunk_comments: list[dict[str, Any]] +) -> None: + repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"] + pull_number = EVENT_DATA["number"] + commit_id = EVENT_DATA["pull_request"]["head"]["sha"] + url = f"{repo_url}/pulls/{pull_number}/reviews" + data = { + "body": full_context_review, + "event": "COMMENT", + "comments": single_chunk_comments, + "commit_id": commit_id, + } + response = requests.post(url, headers=HEADERS, json=data) + response.raise_for_status() + + +def main() -> None: + """Code Reviewer for Gitea""" + + if EVENT_DATA["action"] != "opened": + print("Unsupproted event.") + return + + diff = get_diff() + if diff is None: + return + elif not diff: + print("No diff found.") + return + + parsed_diff = parse_diff(diff) + comments = analyze_single_chunks(parsed_diff) + + full_context_response = analyze_full_context(parsed_diff) + + post_review(full_context_response, comments) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml index 8047d78..1558727 100644 --- a/.github/workflows/code-review.yml +++ b/.github/workflows/code-review.yml @@ -21,12 +21,17 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install requests py-gitea + pip install requests py-gitea openai anthropic google-generativeai - name: Run Code Review env: - GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} - CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }} + ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} + CLAUDE_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} PR_NUMBER: ${{ github.event.pull_request.number }} - run: python .github/scripts/code_review.py \ No newline at end of file + FULL_CONTEXT_MODEL: gpt-4o + SINGLE_CHUNK_MODEL: gpt-4o + EXCLUDE: "*.yml,*.yaml" + run: python .github/scripts/code_review.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 1b1202f..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Test action - -on: - pull_request: - types: [opened, synchronize] - -permissions: - contents: read - pull-requests: write - -jobs: - review: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Run Code Review - run: python .github/scripts/test.py diff --git a/test.py b/test.py deleted file mode 100644 index ea80048..0000000 --- a/test.py +++ /dev/null @@ -1,8 +0,0 @@ -from gitea import Gitea - -g = Gitea( - "https://git.teahaven.kr", - "735a1106653ce9a63ca80667f32e93221427fecc", -) - -