diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py new file mode 100644 index 0000000..bdb0ccb --- /dev/null +++ b/.gitea/scripts/code_review.py @@ -0,0 +1,281 @@ +import fnmatch +import json +import os +import re +from typing import Any, Optional + +import requests +from model import Model + +ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "") +HEADERS = {"Authorization": f"token {ACCESS_TOKEN}"} + +GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") +try: + with open(GITHUB_EVENT_PATH, "r") as f: + EVENT_DATA = json.load(f) +except FileNotFoundError: + print("Failed to load event data.") + exit(1) + +FULL_CONTEXT_MODEL_NAME = os.getenv("FULL_CONTEXT_MODEL", "") +SINGLE_CHUNK_MODEL_NAME = os.getenv("SINGLE_CHUNK_MODEL", "") +FULL_CONTEXT_API_KEY = os.getenv("FULL_CONTEXT_API_KEY", "") +SINGLE_CHUNK_API_KEY = os.getenv("SINGLE_CHUNK_API_KEY", "") + +EXCLUDE_PATTERNS = os.getenv("EXCLUDE", "").split(",") + + +def get_diff() -> str | None: + """Get code difference between base and head from Gitea. + + Returns: + str | None: code difference between base and head, or None if failed to get diff + """ + url = EVENT_DATA["pull_request"]["diff_url"] + try: + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + return response.text + except requests.RequestException as e: + print(f"Failed to get diff: {e}") + return None + + +def parse_diff(diff: str) -> list[dict[str, Any]]: + """Parse diff into list of dicts. + + Args: + diff: str, code difference between base and head + + Returns: + list[dict[str, Any]]: list of dicts, each dict represents a code chunks + """ + file_pattern = re.compile( + r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S + ) + old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$") + hunk_pattern = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)(?=^@@ |$)", + re.MULTILINE | re.DOTALL, + ) + list_diff = [] + for match in file_pattern.finditer(diff): + diff_text = match.group(3) + + old_new_match = list(old_new_pattern.finditer(diff_text)) + if len(old_new_match) != 2: + continue + + old_file = old_new_match[0].group(2) + old_file = old_file.lstrip("a/") if old_file.startswith("a/") else old_file + + new_file = old_new_match[1].group(2) + if new_file == "/dev/null": + print("Neglict deleted file") + continue + new_file = new_file.lstrip("b/") + + hunk_match = hunk_pattern.search(diff_text) + if hunk_match is None: + continue + old_idx = int(hunk_match.group(1)) + new_idx = int(hunk_match.group(3)) + remain_text = diff_text[hunk_match.end() + 1 :] + diff_text = [] + for line in remain_text.splitlines(): + if line.startswith("-"): + diff_text.append(f"{old_idx} {line}") + old_idx += 1 + elif line.startswith("+"): + diff_text.append(f"{new_idx} {line}") + new_idx += 1 + else: + diff_text.append(line) + diff_text = "\n".join(diff_text) + + if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS): + print(f"Exclude file {new_file}") + continue + + list_diff.append( + { + "file": new_file, + "chunk": diff_text, + } + ) + return list_diff + + +def create_comment( + file: str, ai_response: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Create comments for single chunk review. + + Args: + file: str, file name + ai_response: list[dict[str, Any]], AI response for single chunk review + + Returns: + list[dict[str, Any]]: comments for single chunk review + """ + comments = [] + for ai_response in ai_response: + comments.append( + { + "body": f"[REVIEW] {ai_response['reviewComment']}", + "path": file, + "new_position": int(ai_response["lineNumber"]), + } + ) + return comments + + +def analyze_single_chunks( + single_chunk_model: Model, parsed_diff: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Analyze single chunks and create comments. + + Args: + single_chunk_model: AI Session for single chunk analysis + parsed_diff: list[dict[str, Any]], parsed diff + + Returns: + list[dict[str, Any]]: comments for single chunk review + """ + comments = [] + title = EVENT_DATA["pull_request"]["title"] + description = EVENT_DATA["pull_request"]["body"] + for diff in parsed_diff: + file = diff["file"] + chunk = diff["chunk"] + response = single_chunk_model.get_response_single_chunk( + file, title, description, chunk + ) + response = response.strip("`").lstrip("json").strip() or "[]" + + try: + response_json = json.loads(response) + new_comments = create_comment(file, response_json) + comments.extend(new_comments) + except json.JSONDecodeError: + print(f"Failed to parse response: {response}") + continue + + return comments + + +def get_file_content(file: str) -> str | None: + """Get file content from Gitea. + + Args: + file: str, file name + + Returns: + str | None: file content, or None if failed to get file content + """ + repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"] + branch = EVENT_DATA["pull_request"]["head"]["ref"] + + replaced_file = file.replace("/", "%2F") + url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}" + + try: + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + return response.text + except requests.RequestException as e: + print(f"Failed to get file content: {e}") + return None + + +def analyze_full_context( + full_context_model: Model, parsed_diff: list[dict[str, Any]] +) -> str: + """Analyze full context and create review. + + Args: + full_context_model: AI Session for full context analysis + parsed_diff: list[dict[str, Any]], parsed diff + + Returns: + str: review for full context + """ + file_contents = [] + for diff in parsed_diff: + file = diff["file"] + chunk = diff["chunk"] + content = get_file_content(file) + if content is None: + continue + file_contents.append(f"File: {file}") + file_contents.append(content) + file_contents.append(f"Diff: {chunk}") + + title = EVENT_DATA["pull_request"]["title"] + description = EVENT_DATA["pull_request"]["body"] + response = full_context_model.get_response_full_context( + title, description, file_contents + ) + response = response.strip("`").lstrip("markdown").strip() + return response + + +def post_review( + full_context_review: str, single_chunk_comments: list[dict[str, Any]] +) -> None: + """Post review to Gitea. + + Args: + full_context_review: str, review for full context + single_chunk_comments: list[dict[str, Any]], comments for single chunk review + """ + repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"] + pull_number = EVENT_DATA["number"] + commit_id = EVENT_DATA["pull_request"]["head"]["sha"] + url = f"{repo_url}/pulls/{pull_number}/reviews" + data = { + "body": full_context_review, + "event": "COMMENT", + "comments": single_chunk_comments, + "commit_id": commit_id, + } + response = requests.post(url, headers=HEADERS, json=data) + response.raise_for_status() + + +def main() -> None: + """Code Reviewer for Gitea.""" + if EVENT_DATA["action"] not in ["opened", "synchronized"]: + print("Unsupproted event.") + return + + diff = get_diff() + if diff is None: + return + elif not diff: + print("No diff found.") + return + + full_context_model = Model( + model=FULL_CONTEXT_MODEL_NAME, + api_key=FULL_CONTEXT_API_KEY, + is_full_context=True, + ) + single_chunk_model = Model( + model=SINGLE_CHUNK_MODEL_NAME, + api_key=SINGLE_CHUNK_API_KEY, + is_full_context=False, + ) + + parsed_diff = parse_diff(diff) + + comments = analyze_single_chunks(single_chunk_model, parsed_diff) + + full_context_response = analyze_full_context(full_context_model, parsed_diff) + + post_review(full_context_response, comments) + + +if __name__ == "__main__": + main() diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py new file mode 100644 index 0000000..78ba66a --- /dev/null +++ b/.gitea/scripts/model.py @@ -0,0 +1,235 @@ +from enum import Enum +from typing import Any + +import google.generativeai as genai +from anthropic import Anthropic +from openai import OpenAI + + +class ModelProvider(Enum): + """The model provider.""" + + OPENAI = "openai" + ANTHROPIC = "anthropic" + GOOGLE = "google" + DEEPSEEK = "deepseek" + + @classmethod + def from_model(cls, model: str) -> "ModelProvider": + """Get the model provider from the model name. + + Args: + model (str): The model name. + + Returns: + ModelProvider: The model provider. + """ + for prefix, provider in PREFIX_TO_MODEL.items(): + if model.startswith(prefix): + return provider + raise ValueError(f"Unknown model: {model}") + + +PREFIX_TO_MODEL = { + "gpt": ModelProvider.OPENAI, + "o1": ModelProvider.OPENAI, + "claude": ModelProvider.ANTHROPIC, + "gemini": ModelProvider.GOOGLE, + "deepseek": ModelProvider.DEEPSEEK, +} + + +class Model: + """The model class. + + Attributes: + model (str): The model name. + api_key (str): The API key. + system_prompt (str): The system prompt. + max_tokens (int): The maximum tokens. + """ + + def __init__( # noqa: D107 + self, + model: str, + api_key: str, + is_full_context: bool, + max_tokens: int = 4196, + ): + self.model = model + self.system_prompt = ( + FULL_CONTEXT_SYSTEM_PROMPT + if is_full_context + else SINGLE_CHUNK_SYSTEM_PROMPT + ) + self.max_tokens = max_tokens + self.provider = ModelProvider.from_model(model) + self.session = self.create_session(api_key) + + def create_session(self, api_key: str) -> Any: + """Create a session for the model. + + Args: + api_key (str): The API key. + + Returns: + Any: The session. + """ + match self.provider: + case ModelProvider.OPENAI: + return OpenAI(api_key=api_key) + case ModelProvider.ANTHROPIC: + return Anthropic(api_key=api_key) + case ModelProvider.GOOGLE: + genai.configure(api_key=api_key) + return genai.GenerativeModel(model=self.model, api_key=api_key) + case ModelProvider.DEEPSEEK: + return OpenAI(api_key=api_key, base_url="https://api.deepseek.com") + + def request(self, prompt: str) -> str: + """Request the model to generate a response. + + Args: + prompt (str): The prompt to generate a response for. + + Returns: + str: The generated response. + """ + match self.provider: + case ModelProvider.OPENAI | ModelProvider.DEEPSEEK: + response = self.session.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": prompt}, + ], + temperature=0.2, + max_tokens=self.max_tokens, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ) + return response.choices[0].message.content.strip() + case ModelProvider.ANTHROPIC: + response = self.session.messages.create( + model=self.model, + messages=[{"role": "user", "content": prompt}], + system=[ + { + "type": "text", + "text": self.system_prompt, + "cache_control": {"type": "ephemeral"}, + } + ], + temperature=0.2, + max_tokens=self.max_tokens, + ) + return response.content[0].text.strip() + case ModelProvider.GOOGLE: + response = self.session.generate_content(prompt) + return response.text.strip() + + def get_response_single_chunk( + self, file: str, title: str, description: str, chunk: str + ) -> str: + """Get the response for a single chunk. + + Args: + file (str): The file name. + title (str): The pull request title. + description (str): The pull request description. + chunk (str): The diff chunk. + + Returns: + str: The response. + """ + prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk) + return self.request(prompt) + + def get_response_full_context( + self, title: str, description: str, file_contents: list[str] + ) -> str: + """Get the response for full context. + + Args: + title (str): The pull request title. + description (str): The pull request description. + file_contents (list[str]): The file contents, diffs. + + Returns: + str: The response. + """ + try: + prompt = FULL_CONTEXT_USER_PROMPT.format( + title, description, "\n".join(file_contents) + ) + return self.request(prompt) + except Exception as e: + print(f"Error during full context response: {e}") + print(prompt) + return None + + +SINGLE_CHUNK_SYSTEM_PROMPT = ( + "Your task is to review pull requests. Instructions:\n" + "- Provide the response in the following JSON format: " + """[{{"lineNumber": , "reviewComment": ""}}] \n""" + "- lineNumber is about the line number of the code that in new file. \n" + "- Do not give positive comments or compliments. \n" + "- Provide comments and suggestions ONLY if there is something to improve" + "otherwise return an empty array. \n" + "- Write the comment in GitHub Markdown format. \n" + "- Use the given description only for the overall context " + "and only comment the code. \n" + "- IMPORTANT: NEVER suggest adding comments to the code. \n" +) +SINGLE_CHUNK_USER_PROMPT = ( + "Review the following code diff in the file " + "{} and take the pull request title and description into account " + "when writing the response. \n" + "Pull request title: {} \n" + "Pull request description: \n" + "--- \n" + "{} \n" + "--- \n" + "Git diff to review: \n" + "```diff \n" + "{} \n" + "```" +) + +FULL_CONTEXT_SYSTEM_PROMPT = ( + "You are an experienced software engineer specializing in reviewing pull " + "requests. Your task is to provide an overall code review summary for a PR. " + "Focus on assessing the following aspects:\n" + "1. **Code Structure & Architecture:** " + "Evaluate whether the code is well-organized, modular, " + "and adheres to clean code principles. Suggest improvements if needed.\n" + "2. **Refactoring Opportunities:** " + "Identify areas where the code can be optimized or simplified without changing " + "its behavior.\n" + "3. **Potential Future Problems:** " + "Highlight possible scalability, maintainability, or dependency issues that might " + "arise in the future based on the current implementation.\n" + "Be constructive and clear in your feedback. Avoid commenting on trivial issues " + "or syntax errors—focus on high-level feedback.\n" + "Precise instructions:\n" + "- Do not give positive comments or compliments.\n" + "- Provide comments and suggestions ONLY if there is something to improve, " + "otherwise return an empty string.\n" + "- Write the comment in GitHub Markdown format.\n" + "- Do not start with 'markdown' or '```markdown'.\n" + "- IMPORTANT: Give example code block or pseudo code if you can.\n" +) + +FULL_CONTEXT_USER_PROMPT = ( + "Review the following code and take the pull request title " + "and description into account when writing the response. \n" + "Pull request title: {} \n" + "Pull request description: \n" + "--- \n" + "{} \n" + "--- \n" + "Code to review: \n" + "{}" +) diff --git a/.github/workflows/code-review.yml b/.gitea/workflows/code-review.yml similarity index 67% rename from .github/workflows/code-review.yml rename to .gitea/workflows/code-review.yml index 1558727..be7d67d 100644 --- a/.github/workflows/code-review.yml +++ b/.gitea/workflows/code-review.yml @@ -26,12 +26,10 @@ jobs: - name: Run Code Review env: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} - CLAUDE_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - PR_NUMBER: ${{ github.event.pull_request.number }} FULL_CONTEXT_MODEL: gpt-4o + FULL_CONTEXT_API_KEY: ${{ secrets.OPENAI_API_KEY }} SINGLE_CHUNK_MODEL: gpt-4o + SINGLE_CHUNK_API_KEY: ${{ secrets.OPENAI_API_KEY }} EXCLUDE: "*.yml,*.yaml" - run: python .github/scripts/code_review.py + run: python .gitea/scripts/code_review.py + diff --git a/.github/scripts/code_review.py b/.github/scripts/code_review.py deleted file mode 100644 index c796027..0000000 --- a/.github/scripts/code_review.py +++ /dev/null @@ -1,379 +0,0 @@ -import base64 -import os -import re -import fnmatch -import json -import datetime -from openai import OpenAI -from anthropic import Anthropic -import google.generativeai as genai -from collections import defaultdict -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Any, Optional, Callable - -import requests - -ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "") -HEADERS = {"Authorization": f"token {ACCESS_TOKEN}"} - -SINGLE_CHUNK_SYSTEM_PROMPT = """Your task is to review pull requests. Instructions: -- Provide the response in the following JSON format: [{{"lineNumber": , "reviewComment": ""}}] -- Do not give positive comments or compliments. -- Provide comments and suggestions ONLY if there is something to improve, otherwise return an empty array. -- Write the comment in GitHub Markdown format. -- Use the given description only for the overall context and only comment the code. -- IMPORTANT: NEVER suggest adding comments to the code. -""" - -FULL_CONTEXT_SYSTEM_PROMPT = """You are an experienced software engineer specializing in reviewing pull requests. Your task is to provide an overall code review summary for a PR. Focus on assessing the following aspects: - -1. **Code Structure & Architecture:** Evaluate whether the code is well-organized, modular, and adheres to clean code principles. Suggest improvements if needed. - -2. **Refactoring Opportunities:** Identify areas where the code can be optimized or simplified without changing its behavior. - -3. **Potential Future Problems:** Highlight possible scalability, maintainability, or dependency issues that might arise in the future based on the current implementation. - -Be constructive and clear in your feedback. Avoid commenting on trivial issues or syntax errors—focus on high-level feedback. - -Precise instructions: -- Do not give positive comments or compliments. -- Provide comments and suggestions ONLY if there is something to improve, otherwise return an empty string. -- Write the comment in GitHub Markdown format. -- Do not start with "markdown" or "```markdown". -- IMPORTANT: Give example code block or pseudo code if you can. -""" - -GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") -with open(GITHUB_EVENT_PATH, "r") as f: - EVENT_DATA = json.load(f) - - -class PRDetails: - def __init__( - self, owner: str, repo: str, pull_number: int, title: str, description: str - ): - self.owner = owner - self.repo = repo - self.pull_number = pull_number - self.title = title - self.description = description - - -PR_DETAILS = PRDetails( - owner=EVENT_DATA["repository"]["owner"]["login"], - repo=EVENT_DATA["repository"]["name"], - pull_number=EVENT_DATA["number"], - title=EVENT_DATA["pull_request"]["title"], - description=EVENT_DATA["pull_request"]["body"], -) - -EXCLUDE_PATTERNS = os.getenv("EXCLUDE", "").split(",") - -FULL_CONTEXT_MODEL = os.getenv("FULL_CONTEXT_MODEL", "o1") -SINGLE_CHUNK_MODEL = os.getenv("SINGLE_CHUNK_MODEL", "claude-3-5-sonnet-20241022") - -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") -CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "") -DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "") -GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "") - - -def parse_provider( - model: str, is_full_context: bool = False -) -> tuple[Callable, Callable]: - max_tokens = 4196 if is_full_context else 700 - system_prompt = ( - FULL_CONTEXT_SYSTEM_PROMPT if is_full_context else SINGLE_CHUNK_SYSTEM_PROMPT - ) - if any(key in model for key in ["o1", "gpt"]): - openai = OpenAI(api_key=OPENAI_API_KEY) - return ( - lambda prompt: openai.chat.completions.create( - model=model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt}, - ], - temperature=0.2, - max_tokens=max_tokens, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - ), - lambda response: response.choices[0] - .message.content.strip() - .strip("`") - .lstrip("json") - .strip() - or "[]", - ) - elif any(key in model for key in ["claude", "haiku"]): - claude = Anthropic(api_key=CLAUDE_API_KEY) - return ( - lambda prompt: claude.messages.create( - model=model, - messages=[{"role": "user", "content": prompt}], - system=[ - { - "type": "text", - "text": system_prompt, - "cache_control": {"type": "ephemeral"}, - } - ], - temperature=0.2, - max_tokens=max_tokens, - ), - lambda response: response.content[0].text.strip() or "[]", - ) - elif any(key in model for key in ["deepseek"]): - deepseek = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com") - return ( - lambda prompt: deepseek.chat.completions.create( - model=model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt}, - ], - temperature=0.2, - max_tokens=max_tokens, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - ), - lambda response: response.choices[0] - .message.content.strip() - .strip("`") - .lstrip("json") - .strip() - or "[]", - ) - elif any(key in model for key in ["gemini"]): - genai.configure(api_key=GOOGLE_API_KEY) - gemini = genai.GenerativeModel(model, system_instruction=system_prompt) - return ( - lambda prompt: gemini.generate_content(prompt), - lambda response: response.text.strip().strip("`").lstrip("json").strip() - or "[]", - ) - else: - raise ValueError(f"Invalid model: {model}") - - -FULL_CONTEXT_MESSAGE, FULL_CONTEXT_RESPONSE_PARSER = parse_provider( - FULL_CONTEXT_MODEL, is_full_context=True -) -SINGLE_CHUNK_MESSAGE, SINGLE_CHUNK_RESPONSE_PARSER = parse_provider( - SINGLE_CHUNK_MODEL, is_full_context=False -) - - -def get_diff() -> str | None: - """Get code difference between base and head from Gitea""" - url = EVENT_DATA["pull_request"]["diff_url"] - response = requests.get(url, headers=HEADERS) - response.raise_for_status() - - if response.status_code != 200: - print(f"Failed to get diff with code : {response.status_code}") - return None - return response.text - - -def parse_diff(diff: str) -> list[dict[str, Any]]: - """Parse diff into list of dicts - - Args: - diff: str, code difference between base and head - - Returns: - list[dict[str, Any]]: list of dicts, each dict represents a code chunks - """ - file_pattern = re.compile( - r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S - ) - old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$") - list_diff = [] - for match in file_pattern.finditer(diff): - diff_text = match.group(3) - - old_new_match = list(old_new_pattern.finditer(diff_text)) - if len(old_new_match) != 2: - continue - - old_file = old_new_match[0].group(2) - old_file = old_file.lstrip("a/") if old_file.startswith("a/") else old_file - - new_file = old_new_match[1].group(2) - if new_file == "/dev/null": - print("Neglict deleted file") - continue - new_file = new_file.lstrip("b/") - - if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS): - print(f"Exclude file {new_file}") - continue - - list_diff.append( - { - "file": new_file, - "chunk": diff_text, - } - ) - return list_diff - - -def create_single_chunk_prompt(file: str, chunk: str) -> str: - return f""" -Review the following code diff in the file "{file}" and take the pull request title and description into account when writing the response. - -Pull request title: {PR_DETAILS.title} -Pull request description: - ---- -{PR_DETAILS.description} ---- - -Git diff to review: - -```diff -{chunk} -```""" - - -def get_ai_response_single_chunk(prompt: str) -> Optional[list[dict[str, Any]]]: - try: - response = SINGLE_CHUNK_MESSAGE(prompt) - content = SINGLE_CHUNK_RESPONSE_PARSER(response) - return json.loads(content) - except Exception as e: - print(f"Error during AI response: {e}") - print(response) - return None - - -def create_comment( - file: str, ai_response: list[dict[str, Any]] -) -> list[dict[str, Any]]: - comments = [] - for ai_response in ai_response: - comments.append( - { - "body": f"[REVIEW] {ai_response['reviewComment']}", - "path": file, - "new_position": int(ai_response["lineNumber"]), - } - ) - return comments - - -def analyze_single_chunks(parsed_diff: list[dict[str, Any]]) -> list[dict[str, Any]]: - comments = [] - for diff in parsed_diff: - file = diff["file"] - chunk = diff["chunk"] - prompt = create_single_chunk_prompt(file, chunk) - ai_response = get_ai_response_single_chunk(prompt) - if ai_response: - new_comments = create_comment(file, ai_response) - comments.extend(new_comments) - return comments - - -def get_file_content(file: str) -> str | None: - repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"] - branch = EVENT_DATA["pull_request"]["head"]["ref"] - - replaced_file = file.replace("/", "%2F") - url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}" - - response = requests.get(url, headers=HEADERS) - response.raise_for_status() - - if response.status_code != 200: - print(f"Failed to get file content with code : {response.status_code}") - return None - return response.text - - -def get_ai_response_full_context(prompt: str) -> Optional[str]: - try: - response = FULL_CONTEXT_MESSAGE(prompt) - content = FULL_CONTEXT_RESPONSE_PARSER(response) - return content - except Exception as e: - print(f"Error during AI response: {e}") - print(response) - return None - - -def analyze_full_context(parsed_diff: list[dict[str, Any]]) -> str: - file_contents = [] - for diff in parsed_diff: - file = diff["file"] - chunk = diff["chunk"] - content = get_file_content(file) - if content is None: - continue - file_contents.append(f"File: {file}") - file_contents.append(content) - file_contents.append(f"Diff: {chunk}") - - whole_content = f"""Review the following code and take the pull request title and description into account when writing the response. - -Pull request title: {PR_DETAILS.title} -Pull request description: ---- -{PR_DETAILS.description} ---- - -Code to review: - -""" + "\n".join(file_contents) - ai_response = get_ai_response_full_context(whole_content) - if ai_response is None: - return None - - return ai_response - - -def post_review( - full_context_review: str, single_chunk_comments: list[dict[str, Any]] -) -> None: - repo_url = EVENT_DATA["pull_request"]["head"]["repo"]["url"] - pull_number = EVENT_DATA["number"] - commit_id = EVENT_DATA["pull_request"]["head"]["sha"] - url = f"{repo_url}/pulls/{pull_number}/reviews" - data = { - "body": full_context_review, - "event": "COMMENT", - "comments": single_chunk_comments, - "commit_id": commit_id, - } - response = requests.post(url, headers=HEADERS, json=data) - response.raise_for_status() - - -def main() -> None: - """Code Reviewer for Gitea""" - - if EVENT_DATA["action"] != "opened": - print("Unsupproted event.") - return - - diff = get_diff() - if diff is None: - return - elif not diff: - print("No diff found.") - return - - parsed_diff = parse_diff(diff) - comments = analyze_single_chunks(parsed_diff) - - full_context_response = analyze_full_context(parsed_diff) - - post_review(full_context_response, comments) - - -if __name__ == "__main__": - main()