From 89dc1efda75ada7633aa0c440c8bfe827461d6b8 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:08:10 +0900 Subject: [PATCH 01/25] update template asynch --- .gitea/scripts/code_review.py | 77 ++++++++++++++++++++------------ .gitea/scripts/model.py | 16 +++---- .gitea/workflows/code-review.yml | 2 +- 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index e8d71fe..e87e86a 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -1,5 +1,6 @@ """Code Reviewer for Gitea.""" +import asyncio import fnmatch import json import os @@ -7,6 +8,7 @@ import re from typing import Any import requests +import aiohttp from model import Model ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "") @@ -133,7 +135,7 @@ def create_comment( return comments -def analyze_single_chunks( +async def analyze_single_chunks( single_chunk_model: Model, parsed_diff: list[dict[str, Any]] ) -> list[dict[str, Any]]: """Analyze single chunks and create comments. @@ -145,29 +147,33 @@ def analyze_single_chunks( Returns: list[dict[str, Any]]: comments for single chunk review """ - comments = [] - title = EVENT_DATA["pull_request"]["title"] - description = EVENT_DATA["pull_request"]["body"] - for diff in parsed_diff: + + async def process_single_chunk(diff: dict[str, Any]): file = diff["file"] chunk = diff["chunk"] - response = single_chunk_model.get_response_single_chunk( + response = await single_chunk_model.get_response_single_chunk( file, title, description, chunk ) response = response.strip("`").lstrip("json").strip() or "[]" try: response_json = json.loads(response) - new_comments = create_comment(file, response_json) - comments.extend(new_comments) + return create_comment(file, response_json) except json.JSONDecodeError: print(f"Failed to parse response: {response}") - continue + return [] + title = EVENT_DATA["pull_request"]["title"] + description = EVENT_DATA["pull_request"]["body"] + tasks = [process_single_chunk(diff) for diff in parsed_diff] + results = await asyncio.gather(*tasks) + + # Flatten the list of comments + comments = [comment for result in results for comment in result] return comments -def get_file_content(file: str) -> str | None: +async def get_file_content(file: str) -> str | None: """Get file content from Gitea. Args: @@ -183,15 +189,16 @@ def get_file_content(file: str) -> str | None: url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}" try: - response = requests.get(url, headers=HEADERS) - response.raise_for_status() - return response.text - except requests.RequestException as e: + async with aiohttp.ClientSession(headers=HEADERS) as session: + async with session.get(url) as response: + response.raise_for_status() + return await response.text() + except Exception as e: print(f"Failed to get file content: {e}") return None -def analyze_full_context( +async def analyze_full_context( full_context_model: Model, parsed_diff: list[dict[str, Any]] ) -> str: """Analyze full context and create review. @@ -203,22 +210,26 @@ def analyze_full_context( Returns: str: review for full context """ - file_contents = [] - for diff in parsed_diff: + + async def get_file_data(diff: dict[str, Any]): file = diff["file"] chunk = diff["chunk"] - content = get_file_content(file) + content = await get_file_content(file) if content is None: - continue - file_contents.append(f"File: {file}") - file_contents.append(content) - file_contents.append(f"Diff: {chunk}") + return None + return f"File: {file}\n{content}\nDiff: {chunk}" + + tasks = [get_file_data(diff) for diff in parsed_diff] + file_contents_list = await asyncio.gather(*tasks) + + file_contents = [item for item in file_contents_list if item is not None] + if not file_contents: return "" title = EVENT_DATA["pull_request"]["title"] description = EVENT_DATA["pull_request"]["body"] - response = full_context_model.get_response_full_context( + response = await full_context_model.get_response_full_context( title, description, file_contents ) response = response.strip("`").lstrip("markdown").strip() @@ -248,10 +259,10 @@ def post_review( response.raise_for_status() -def main() -> None: - """Code Reviewer for Gitea.""" +async def main() -> None: + """Code Reviewer for Gitea: Asynchronous version.""" if EVENT_DATA["action"] not in ["opened", "synchronized"]: - print("Unsupproted event.") + print("Unsupported event.") return diff = get_diff() @@ -273,10 +284,18 @@ def main() -> None: ) parsed_diff = parse_diff(diff) - comments = analyze_single_chunks(single_chunk_model, parsed_diff) - full_context_response = analyze_full_context(full_context_model, parsed_diff) + comments_task = asyncio.create_task( + analyze_single_chunks(single_chunk_model, parsed_diff) + ) + full_context_response_task = asyncio.create_task( + analyze_full_context(full_context_model, parsed_diff) + ) + + comments = await comments_task + full_context_response = await full_context_response_task + post_review(full_context_response, comments) if __name__ == "__main__": - main() + asyncio.run(main()) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 9004ab1..5773673 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -88,7 +88,7 @@ class Model: case ModelProvider.DEEPSEEK: return OpenAI(api_key=api_key, base_url="https://api.deepseek.com") - def request(self, prompt: str) -> str: + async def request(self, prompt: str) -> str: """Request the model to generate a response. Args: @@ -99,7 +99,7 @@ class Model: """ match self.provider: case ModelProvider.OPENAI | ModelProvider.DEEPSEEK: - response = self.session.chat.completions.create( + response = await self.session.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": self.system_prompt}, @@ -113,7 +113,7 @@ class Model: ) return response.choices[0].message.content.strip() case ModelProvider.ANTHROPIC: - response = self.session.messages.create( + response = await self.session.messages.create( model=self.model, messages=[{"role": "user", "content": prompt}], system=[ @@ -128,10 +128,10 @@ class Model: ) return response.content[0].text.strip() case ModelProvider.GOOGLE: - response = self.session.generate_content(prompt) + response = await self.session.generate_content(prompt) return response.text.strip() - def get_response_single_chunk( + async def get_response_single_chunk( self, file: str, title: str, description: str, chunk: str ) -> str: """Get the response for a single chunk. @@ -146,9 +146,9 @@ class Model: str: The response. """ prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk) - return self.request(prompt) + return await self.request(prompt) - def get_response_full_context( + async def get_response_full_context( self, title: str, description: str, file_contents: list[str] ) -> str: """Get the response for full context. @@ -165,7 +165,7 @@ class Model: prompt = FULL_CONTEXT_USER_PROMPT.format( title, description, "\n".join(file_contents) ) - return self.request(prompt) + return await self.request(prompt) except Exception as e: print(f"Error during full context response: {e}") print(prompt) diff --git a/.gitea/workflows/code-review.yml b/.gitea/workflows/code-review.yml index be7d67d..2c3928e 100644 --- a/.gitea/workflows/code-review.yml +++ b/.gitea/workflows/code-review.yml @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install requests py-gitea openai anthropic google-generativeai + pip install aiohttp requests py-gitea openai anthropic google-generativeai - name: Run Code Review env: -- 2.49.1 From 37b93207989ccfad96ff6a6164ca262f4f20e0b9 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:14:49 +0900 Subject: [PATCH 02/25] update aynch client --- .gitea/scripts/model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 5773673..572dd72 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -4,8 +4,8 @@ from enum import Enum from typing import Any import google.generativeai as genai -from anthropic import Anthropic -from openai import OpenAI +from anthropic import AsyncAnthropic +from openai import AsyncOpenAI class ModelProvider(Enum): @@ -79,14 +79,14 @@ class Model: """ match self.provider: case ModelProvider.OPENAI: - return OpenAI(api_key=api_key) + return AsyncOpenAI(api_key=api_key) case ModelProvider.ANTHROPIC: - return Anthropic(api_key=api_key) + return AsyncAnthropic(api_key=api_key) case ModelProvider.GOOGLE: genai.configure(api_key=api_key) return genai.GenerativeModel(model=self.model, api_key=api_key) case ModelProvider.DEEPSEEK: - return OpenAI(api_key=api_key, base_url="https://api.deepseek.com") + return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com") async def request(self, prompt: str) -> str: """Request the model to generate a response. @@ -128,7 +128,7 @@ class Model: ) return response.content[0].text.strip() case ModelProvider.GOOGLE: - response = await self.session.generate_content(prompt) + response = await self.session.generate_content_async(prompt) return response.text.strip() async def get_response_single_chunk( -- 2.49.1 From d82ff134391184ee6d61e74542b6c4e2e4729035 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:15:15 +0900 Subject: [PATCH 03/25] change test model --- .gitea/workflows/code-review.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/code-review.yml b/.gitea/workflows/code-review.yml index 2c3928e..4c5b042 100644 --- a/.gitea/workflows/code-review.yml +++ b/.gitea/workflows/code-review.yml @@ -28,8 +28,8 @@ jobs: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} FULL_CONTEXT_MODEL: gpt-4o FULL_CONTEXT_API_KEY: ${{ secrets.OPENAI_API_KEY }} - SINGLE_CHUNK_MODEL: gpt-4o - SINGLE_CHUNK_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SINGLE_CHUNK_MODEL: claude-3-5-sonnet-20240620 + SINGLE_CHUNK_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} EXCLUDE: "*.yml,*.yaml" run: python .gitea/scripts/code_review.py -- 2.49.1 From 0089b581bbb842becbc996209d633c10a723f08a Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:20:40 +0900 Subject: [PATCH 04/25] change model --- .gitea/workflows/code-review.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/code-review.yml b/.gitea/workflows/code-review.yml index 4c5b042..68f1147 100644 --- a/.gitea/workflows/code-review.yml +++ b/.gitea/workflows/code-review.yml @@ -26,9 +26,9 @@ jobs: - name: Run Code Review env: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} - FULL_CONTEXT_MODEL: gpt-4o - FULL_CONTEXT_API_KEY: ${{ secrets.OPENAI_API_KEY }} - SINGLE_CHUNK_MODEL: claude-3-5-sonnet-20240620 + FULL_CONTEXT_MODEL: deepseek-reasoner + FULL_CONTEXT_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} + SINGLE_CHUNK_MODEL: gemini-2.0-flash-exp SINGLE_CHUNK_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} EXCLUDE: "*.yml,*.yaml" run: python .gitea/scripts/code_review.py -- 2.49.1 From f1b4e2ce617fc5714dbde27405d6b5cea18da4d4 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:24:11 +0900 Subject: [PATCH 05/25] remove model parameter --- .gitea/scripts/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 572dd72..8065795 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -84,7 +84,7 @@ class Model: return AsyncAnthropic(api_key=api_key) case ModelProvider.GOOGLE: genai.configure(api_key=api_key) - return genai.GenerativeModel(model=self.model, api_key=api_key) + return genai.GenerativeModel(self.model, api_key=api_key) case ModelProvider.DEEPSEEK: return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com") -- 2.49.1 From 3ff0e65564a8c4e089992eb904448f56551467d7 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:25:36 +0900 Subject: [PATCH 06/25] remove api key --- .gitea/scripts/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 8065795..be8a903 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -84,7 +84,7 @@ class Model: return AsyncAnthropic(api_key=api_key) case ModelProvider.GOOGLE: genai.configure(api_key=api_key) - return genai.GenerativeModel(self.model, api_key=api_key) + return genai.GenerativeModel(self.model) case ModelProvider.DEEPSEEK: return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com") -- 2.49.1 From b5d2df6b2f3a7fcc3a5c2fade8efe0c45a76b338 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:26:58 +0900 Subject: [PATCH 07/25] change api key --- .gitea/workflows/code-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/code-review.yml b/.gitea/workflows/code-review.yml index 68f1147..21649e5 100644 --- a/.gitea/workflows/code-review.yml +++ b/.gitea/workflows/code-review.yml @@ -29,7 +29,7 @@ jobs: FULL_CONTEXT_MODEL: deepseek-reasoner FULL_CONTEXT_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} SINGLE_CHUNK_MODEL: gemini-2.0-flash-exp - SINGLE_CHUNK_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + SINGLE_CHUNK_API_KEY: ${{ secrets.GOOGLE_API_KEY }} EXCLUDE: "*.yml,*.yaml" run: python .gitea/scripts/code_review.py -- 2.49.1 From 94c892c4a771a6a21b6176f6cc97aa73b882e470 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:34:28 +0900 Subject: [PATCH 08/25] add exeception --- .gitea/scripts/code_review.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index e87e86a..667908b 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -193,9 +193,11 @@ async def get_file_content(file: str) -> str | None: async with session.get(url) as response: response.raise_for_status() return await response.text() - except Exception as e: - print(f"Failed to get file content: {e}") - return None + except aiohttp.ClientError as e: # More specific exception handling + print(f"Network error fetching {file}: {e}") + except asyncio.TimeoutError: + print(f"Timeout fetching {file}") + return None async def analyze_full_context( -- 2.49.1 From 1970f27dceb65cfbf9e6a387efcc25d5e7ce3097 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:39:52 +0900 Subject: [PATCH 09/25] update yml --- .gitea/workflows/python-lint.yml | 30 +++++++++++++++++++++ .gitea/workflows/rust-lint.yml | 45 ++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/.gitea/workflows/python-lint.yml b/.gitea/workflows/python-lint.yml index e69de29..7806f1d 100644 --- a/.gitea/workflows/python-lint.yml +++ b/.gitea/workflows/python-lint.yml @@ -0,0 +1,30 @@ +name: mint_ci + +on: + push: + branches: [ "main"] + pull_request: + types: [unlabeled, opened, synchronize, reopened] + +env: + PYTHON_VERSION: "3.12.3" + +jobs: + lint: + name: Check Python code using ruff + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + token: ${{ secrets.ACCESS_TOKEN }} + - uses: astral-sh/ruff-action@v1 + with: + version: 0.7.4 + args: check . --select=E5,F4,F8,D400,D403,D417,D100,D102,D103,D101,ANN001,ANN201 --output-format=full --exclude='**/test_*.py','**/__init__.py' --force-exclude + changed-files: 'true' + - uses: astral-sh/ruff-action@v1 + with: + version: 0.7.4 + args: check . --select=I --output-format=full --force-exclude + changed-files: 'true' + continue-on-error: true \ No newline at end of file diff --git a/.gitea/workflows/rust-lint.yml b/.gitea/workflows/rust-lint.yml index e69de29..c1b627d 100644 --- a/.gitea/workflows/rust-lint.yml +++ b/.gitea/workflows/rust-lint.yml @@ -0,0 +1,45 @@ +on: + push: + branches: [main] + pull_request: + types: [unlabeled, opened, synchronize, reopened] + merge_group: + +name: Rust-lint + + +jobs: + rust_tests: + env: + RUST_BACKTRACE: full + name: Run rust tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + - uses: Swatinem/rust-cache@v2 + + - name: run clippy + run: cargo clippy --workspace --exclude rustpython_wasm -- -Dwarnings + + - name: run rust tests + run: cargo test --workspace --verbose + + - name: check compilation without threading + run: cargo check + + + lint: + name: Check Rust code with rustfmt and clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - name: run rustfmt + run: cargo fmt --check + + \ No newline at end of file -- 2.49.1 From 457f5ba18ea715479dac94a246ef125ab146f078 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:54:45 +0900 Subject: [PATCH 10/25] update google config --- .gitea/scripts/model.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index be8a903..c1ed4d7 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -6,6 +6,14 @@ from typing import Any import google.generativeai as genai from anthropic import AsyncAnthropic from openai import AsyncOpenAI +import typing_extensions as typing + + +class GoogleReponse(typing.TypedDict): + """The response from Google model.""" + + lineNumber: int + reviewComment: str class ModelProvider(Enum): @@ -128,7 +136,13 @@ class Model: ) return response.content[0].text.strip() case ModelProvider.GOOGLE: - response = await self.session.generate_content_async(prompt) + response = await self.session.generate_content_async( + prompt, + generation_config=genai.GenerationConfig( + response_mime_type="application/json", + response_schema=list[GoogleReponse], + ), + ) return response.text.strip() async def get_response_single_chunk( @@ -175,7 +189,7 @@ class Model: SINGLE_CHUNK_SYSTEM_PROMPT = ( "Your task is to review pull requests. Instructions:\n" "- Provide the response in the following JSON format: " - """[{{"lineNumber": , "reviewComment": ""}}] \n""" + """[{{"lineNumber": int, "reviewComment": str}}] \n""" "- lineNumber is about the line number of the code that in new file. \n" "- Do not give positive comments or compliments. \n" "- Provide comments and suggestions ONLY if there is something to improve" -- 2.49.1 From 3febb99f5b369020356df45325bd2d3d8ea3a909 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 00:59:12 +0900 Subject: [PATCH 11/25] update system prompt --- .gitea/scripts/model.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index c1ed4d7..3cc6873 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -4,9 +4,9 @@ from enum import Enum from typing import Any import google.generativeai as genai +import typing_extensions as typing from anthropic import AsyncAnthropic from openai import AsyncOpenAI -import typing_extensions as typing class GoogleReponse(typing.TypedDict): @@ -92,7 +92,9 @@ class Model: return AsyncAnthropic(api_key=api_key) case ModelProvider.GOOGLE: genai.configure(api_key=api_key) - return genai.GenerativeModel(self.model) + return genai.GenerativeModel( + model_name=self.model, system_instruction=self.system_prompt + ) case ModelProvider.DEEPSEEK: return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com") -- 2.49.1 From 9454ba7df36874c907f1056c5988ee73742a53cb Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 01:02:00 +0900 Subject: [PATCH 12/25] add line number information --- .gitea/scripts/code_review.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 667908b..08fd9c1 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -89,10 +89,10 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: diff_text = [] for line in remain_text.splitlines(): if line.startswith("-"): - diff_text.append(f"{old_idx} {line}") + diff_text.append(f"old_line_number:{old_idx} {line}") old_idx += 1 elif line.startswith("+"): - diff_text.append(f"{new_idx} {line}") + diff_text.append(f"new_line_number:{new_idx} {line}") new_idx += 1 else: diff_text.append(line) -- 2.49.1 From a3904537f7c0a539c67f09c9a0593a3680158c58 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 01:04:55 +0900 Subject: [PATCH 13/25] add position --- .gitea/scripts/code_review.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 08fd9c1..4dba884 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -127,7 +127,8 @@ def create_comment( for ai_response in ai_response: comments.append( { - "body": f"[REVIEW] {ai_response['reviewComment']}", + "body": f"[REVIEW] {ai_response['reviewComment']}," + f"at line {ai_response['lineNumber']}", "path": file, "new_position": int(ai_response["lineNumber"]), } -- 2.49.1 From 939af86aa6da7b53b1370e800382a92cfef84101 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 01:08:21 +0900 Subject: [PATCH 14/25] update system prompt to indicate linenumber precisely --- .gitea/scripts/code_review.py | 3 +-- .gitea/scripts/model.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 4dba884..08fd9c1 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -127,8 +127,7 @@ def create_comment( for ai_response in ai_response: comments.append( { - "body": f"[REVIEW] {ai_response['reviewComment']}," - f"at line {ai_response['lineNumber']}", + "body": f"[REVIEW] {ai_response['reviewComment']}", "path": file, "new_position": int(ai_response["lineNumber"]), } diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 3cc6873..78d0aa5 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -193,6 +193,7 @@ SINGLE_CHUNK_SYSTEM_PROMPT = ( "- Provide the response in the following JSON format: " """[{{"lineNumber": int, "reviewComment": str}}] \n""" "- lineNumber is about the line number of the code that in new file. \n" + "- lineNumber can be cound at the front of each line. \n" "- Do not give positive comments or compliments. \n" "- Provide comments and suggestions ONLY if there is something to improve" "otherwise return an empty array. \n" -- 2.49.1 From 2a5f2491c2ac8c78b195781f90213c80fba2b949 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 11:44:27 +0900 Subject: [PATCH 15/25] add prompt log --- .gitea/scripts/model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 78d0aa5..aa57827 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -162,7 +162,10 @@ class Model: str: The response. """ prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk) - return await self.request(prompt) + response = await self.request(prompt) + print(f"prompt: {prompt}") + print(f"response: {response}") + return response async def get_response_full_context( self, title: str, description: str, file_contents: list[str] @@ -181,7 +184,10 @@ class Model: prompt = FULL_CONTEXT_USER_PROMPT.format( title, description, "\n".join(file_contents) ) - return await self.request(prompt) + response = await self.request(prompt) + print(f"prompt: {prompt}") + print(f"response: {response}") + return response except Exception as e: print(f"Error during full context response: {e}") print(prompt) -- 2.49.1 From ccf60f3c7e622243a46e3b0bcdaeb4d6387fd74c Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 11:49:43 +0900 Subject: [PATCH 16/25] remove response --- .gitea/scripts/model.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index aa57827..78d0aa5 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -162,10 +162,7 @@ class Model: str: The response. """ prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk) - response = await self.request(prompt) - print(f"prompt: {prompt}") - print(f"response: {response}") - return response + return await self.request(prompt) async def get_response_full_context( self, title: str, description: str, file_contents: list[str] @@ -184,10 +181,7 @@ class Model: prompt = FULL_CONTEXT_USER_PROMPT.format( title, description, "\n".join(file_contents) ) - response = await self.request(prompt) - print(f"prompt: {prompt}") - print(f"response: {response}") - return response + return await self.request(prompt) except Exception as e: print(f"Error during full context response: {e}") print(prompt) -- 2.49.1 From 76d543c766afb86b7f90f99bc3316a7b9999aab3 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 13:42:46 +0900 Subject: [PATCH 17/25] print for debug --- .gitea/scripts/code_review.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 08fd9c1..9bf6a93 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -285,7 +285,9 @@ async def main() -> None: is_full_context=False, ) + print("diff: ", diff) parsed_diff = parse_diff(diff) + print("parsed_diff: ", parsed_diff) comments_task = asyncio.create_task( analyze_single_chunks(single_chunk_model, parsed_diff) ) -- 2.49.1 From a7ae4de9bf25c7c1905f40500a50a51bc86ea74a Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 14:55:32 +0900 Subject: [PATCH 18/25] update line number tag --- .gitea/scripts/code_review.py | 41 +++++++++++++++++------------------ .gitea/scripts/model.py | 3 ++- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 9bf6a93..53c7a5e 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -60,7 +60,7 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: ) old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$") hunk_pattern = re.compile( - r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)(?=^@@ |$)", + r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@|\Z)", re.MULTILINE | re.DOTALL, ) list_diff = [] @@ -79,33 +79,32 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: print("Neglict deleted file") continue new_file = new_file.lstrip("b/") - - hunk_match = hunk_pattern.search(diff_text) - if hunk_match is None: - continue - old_idx = int(hunk_match.group(1)) - new_idx = int(hunk_match.group(3)) - remain_text = diff_text[hunk_match.end() + 1 :] - diff_text = [] - for line in remain_text.splitlines(): - if line.startswith("-"): - diff_text.append(f"old_line_number:{old_idx} {line}") - old_idx += 1 - elif line.startswith("+"): - diff_text.append(f"new_line_number:{new_idx} {line}") - new_idx += 1 - else: - diff_text.append(line) - diff_text = "\n".join(diff_text) - if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS): print(f"Exclude file {new_file}") continue + output_diff_text = [] + for hunk_match in hunk_pattern.finditer(diff_text): + old_idx = int(hunk_match.group(1)) + new_idx = int(hunk_match.group(3)) + remain_text = hunk_match.group(5).splitlines() + for line in remain_text: + if line.startswith("-"): + output_diff_text.append(f"{old_idx} \t {line}") + old_idx += 1 + elif line.startswith("+"): + output_diff_text.append(f"\t {new_idx} {line}") + new_idx += 1 + else: + output_diff_text.append(f"{old_idx} {new_idx} {line}") + old_idx += 1 + new_idx += 1 + + output_diff_text = "\n".join(output_diff_text) list_diff.append( { "file": new_file, - "chunk": diff_text, + "chunk": output_diff_text, } ) return list_diff diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index 78d0aa5..b484170 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -193,7 +193,8 @@ SINGLE_CHUNK_SYSTEM_PROMPT = ( "- Provide the response in the following JSON format: " """[{{"lineNumber": int, "reviewComment": str}}] \n""" "- lineNumber is about the line number of the code that in new file. \n" - "- lineNumber can be cound at the front of each line. \n" + "- lineNumber can be found at the front of each line. \n" + "- At the first number is old line number, the second number is new line number. \n" "- Do not give positive comments or compliments. \n" "- Provide comments and suggestions ONLY if there is something to improve" "otherwise return an empty array. \n" -- 2.49.1 From 3e97b34e2eeab027d9a712d7b5034e0ae764e8fc Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 14:56:36 +0900 Subject: [PATCH 19/25] remove print for log --- .gitea/scripts/code_review.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 53c7a5e..0e3edf5 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -284,9 +284,7 @@ async def main() -> None: is_full_context=False, ) - print("diff: ", diff) parsed_diff = parse_diff(diff) - print("parsed_diff: ", parsed_diff) comments_task = asyncio.create_task( analyze_single_chunks(single_chunk_model, parsed_diff) ) -- 2.49.1 From 133437192d1b697889b6722d0d6355c355e95dd6 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 15:04:42 +0900 Subject: [PATCH 20/25] simplify regex pattern --- .gitea/scripts/code_review.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 0e3edf5..3a3246c 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -60,7 +60,7 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: ) old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$") hunk_pattern = re.compile( - r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@|\Z)", + r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@|\Z)", re.MULTILINE | re.DOTALL, ) list_diff = [] @@ -90,10 +90,10 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: remain_text = hunk_match.group(5).splitlines() for line in remain_text: if line.startswith("-"): - output_diff_text.append(f"{old_idx} \t {line}") + output_diff_text.append(f"{old_idx} None {line}") old_idx += 1 elif line.startswith("+"): - output_diff_text.append(f"\t {new_idx} {line}") + output_diff_text.append(f"None {new_idx} {line}") new_idx += 1 else: output_diff_text.append(f"{old_idx} {new_idx} {line}") -- 2.49.1 From 443c8271c06b7ed463047733e7692c07f50e804d Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 22:36:45 +0900 Subject: [PATCH 21/25] full context only for opened --- .gitea/scripts/code_review.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 3a3246c..127cebd 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -288,13 +288,16 @@ async def main() -> None: comments_task = asyncio.create_task( analyze_single_chunks(single_chunk_model, parsed_diff) ) - full_context_response_task = asyncio.create_task( - analyze_full_context(full_context_model, parsed_diff) - ) + + if EVENT_DATA["action"] == "opened": + full_context_response_task = asyncio.create_task( + analyze_full_context(full_context_model, parsed_diff) + ) + full_context_response = await full_context_response_task + else: + full_context_response = "" comments = await comments_task - full_context_response = await full_context_response_task - post_review(full_context_response, comments) -- 2.49.1 From b8fd0bd36128d64646af3be8e7f2ecea526ae154 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 22:37:05 +0900 Subject: [PATCH 22/25] change hunk for more descriptive name --- .gitea/scripts/code_review.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 127cebd..769ee74 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -59,7 +59,7 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S ) old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$") - hunk_pattern = re.compile( + chunk_range_pattern = re.compile( r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@|\Z)", re.MULTILINE | re.DOTALL, ) @@ -84,10 +84,10 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: continue output_diff_text = [] - for hunk_match in hunk_pattern.finditer(diff_text): - old_idx = int(hunk_match.group(1)) - new_idx = int(hunk_match.group(3)) - remain_text = hunk_match.group(5).splitlines() + for chunk_range_match in chunk_range_pattern.finditer(diff_text): + old_idx = int(chunk_range_match.group(1)) + new_idx = int(chunk_range_match.group(3)) + remain_text = chunk_range_match.group(5).splitlines() for line in remain_text: if line.startswith("-"): output_diff_text.append(f"{old_idx} None {line}") -- 2.49.1 From 810b46b0b4015511b8814ea02aa719d427ae2fa0 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 23:03:50 +0900 Subject: [PATCH 23/25] adapt comment --- .gitea/scripts/code_review.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 769ee74..0a1683b 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -87,8 +87,7 @@ def parse_diff(diff: str) -> list[dict[str, Any]]: for chunk_range_match in chunk_range_pattern.finditer(diff_text): old_idx = int(chunk_range_match.group(1)) new_idx = int(chunk_range_match.group(3)) - remain_text = chunk_range_match.group(5).splitlines() - for line in remain_text: + for line in chunk_range_match.group(5).splitlines(): if line.startswith("-"): output_diff_text.append(f"{old_idx} None {line}") old_idx += 1 @@ -150,7 +149,7 @@ async def analyze_single_chunks( async def process_single_chunk(diff: dict[str, Any]): file = diff["file"] chunk = diff["chunk"] - response = await single_chunk_model.get_response_single_chunk( + response = single_chunk_model.get_response_single_chunk( file, title, description, chunk ) response = response.strip("`").lstrip("json").strip() or "[]" @@ -215,7 +214,7 @@ async def analyze_full_context( async def get_file_data(diff: dict[str, Any]): file = diff["file"] chunk = diff["chunk"] - content = await get_file_content(file) + content = get_file_content(file) if content is None: return None return f"File: {file}\n{content}\nDiff: {chunk}" @@ -230,7 +229,7 @@ async def analyze_full_context( title = EVENT_DATA["pull_request"]["title"] description = EVENT_DATA["pull_request"]["body"] - response = await full_context_model.get_response_full_context( + response = full_context_model.get_response_full_context( title, description, file_contents ) response = response.strip("`").lstrip("markdown").strip() -- 2.49.1 From 284e0735a71e6a28989d0fec6971f6a794af3bc4 Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 23:06:42 +0900 Subject: [PATCH 24/25] add await --- .gitea/scripts/code_review.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/code_review.py b/.gitea/scripts/code_review.py index 0a1683b..bb75b9b 100644 --- a/.gitea/scripts/code_review.py +++ b/.gitea/scripts/code_review.py @@ -149,7 +149,7 @@ async def analyze_single_chunks( async def process_single_chunk(diff: dict[str, Any]): file = diff["file"] chunk = diff["chunk"] - response = single_chunk_model.get_response_single_chunk( + response = await single_chunk_model.get_response_single_chunk( file, title, description, chunk ) response = response.strip("`").lstrip("json").strip() or "[]" @@ -229,7 +229,7 @@ async def analyze_full_context( title = EVENT_DATA["pull_request"]["title"] description = EVENT_DATA["pull_request"]["body"] - response = full_context_model.get_response_full_context( + response = await full_context_model.get_response_full_context( title, description, file_contents ) response = response.strip("`").lstrip("markdown").strip() -- 2.49.1 From c51f3060cd39b9cc687e6cf0f0a9cef22c30f44f Mon Sep 17 00:00:00 2001 From: Myeongseon Choi Date: Mon, 27 Jan 2025 23:24:28 +0900 Subject: [PATCH 25/25] change instruction --- .gitea/scripts/model.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitea/scripts/model.py b/.gitea/scripts/model.py index b484170..dbe344e 100644 --- a/.gitea/scripts/model.py +++ b/.gitea/scripts/model.py @@ -195,12 +195,17 @@ SINGLE_CHUNK_SYSTEM_PROMPT = ( "- lineNumber is about the line number of the code that in new file. \n" "- lineNumber can be found at the front of each line. \n" "- At the first number is old line number, the second number is new line number. \n" + "- If the line starts with `+`, it means the line is added. \n" + "- If the line starts with `-`, it means the line is deleted. \n" + "- Evaluate whether the code changes and additions are appropriate " + "and if the new code structure is suitable. \n" "- Do not give positive comments or compliments. \n" "- Provide comments and suggestions ONLY if there is something to improve" "otherwise return an empty array. \n" "- Write the comment in GitHub Markdown format. \n" "- Use the given description only for the overall context " "and only comment the code. \n" + "- Do not suggest type hint or naming convention. \n" "- IMPORTANT: NEVER suggest adding comments to the code. \n" ) SINGLE_CHUNK_USER_PROMPT = ( -- 2.49.1