2025-01-27 23:29:08 +09:00 · 2025-01-27 00:15:41 +09:00 · 2025-01-27 00:55:29 +09:00 · 2025-01-27 00:15:41 +09:00 · 2025-01-27 00:55:29 +09:00 · 2025-01-27 00:15:41 +09:00
5 changed files with 192 additions and 72 deletions
--- a/.gitea/scripts/code_review.py
+++ b/.gitea/scripts/code_review.py
@@ -1,5 +1,6 @@
 """Code Reviewer for Gitea."""

+import asyncio
 import fnmatch
 import json
 import os
@@ -7,6 +8,7 @@ import re
 from typing import Any

 import requests
+import aiohttp
 from model import Model

 ACCESS_TOKEN = os.getenv("ACCESS_TOKEN", "")
@@ -57,8 +59,8 @@ def parse_diff(diff: str) -> list[dict[str, Any]]:
        r"(?s)diff --git a/(.+?) b/(.*?)\r?\n(.*?)(?=diff --git a/|$)", re.S
    )
    old_new_pattern = re.compile(r"(?m)^(---|\+\+\+)\s+(.*)$")
-    hunk_pattern = re.compile(
-        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)(?=^@@ |$)",
+    chunk_range_pattern = re.compile(
+        r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*?)?(?=@@|\Z)",
        re.MULTILINE | re.DOTALL,
    )
    list_diff = []
@@ -77,33 +79,31 @@ def parse_diff(diff: str) -> list[dict[str, Any]]:
            print("Neglict deleted file")
            continue
        new_file = new_file.lstrip("b/")
-
-        hunk_match = hunk_pattern.search(diff_text)
-        if hunk_match is None:
-            continue
-        old_idx = int(hunk_match.group(1))
-        new_idx = int(hunk_match.group(3))
-        remain_text = diff_text[hunk_match.end() + 1 :]
-        diff_text = []
-        for line in remain_text.splitlines():
-            if line.startswith("-"):
-                diff_text.append(f"{old_idx} {line}")
-                old_idx += 1
-            elif line.startswith("+"):
-                diff_text.append(f"{new_idx} {line}")
-                new_idx += 1
-            else:
-                diff_text.append(line)
-        diff_text = "\n".join(diff_text)
-
        if any(fnmatch.fnmatch(new_file, pattern) for pattern in EXCLUDE_PATTERNS):
            print(f"Exclude file {new_file}")
            continue

+        output_diff_text = []
+        for chunk_range_match in chunk_range_pattern.finditer(diff_text):
+            old_idx = int(chunk_range_match.group(1))
+            new_idx = int(chunk_range_match.group(3))
+            for line in chunk_range_match.group(5).splitlines():
+                if line.startswith("-"):
+                    output_diff_text.append(f"{old_idx} None {line}")
+                    old_idx += 1
+                elif line.startswith("+"):
+                    output_diff_text.append(f"None {new_idx} {line}")
+                    new_idx += 1
+                else:
+                    output_diff_text.append(f"{old_idx} {new_idx} {line}")
+                    old_idx += 1
+                    new_idx += 1
+
+        output_diff_text = "\n".join(output_diff_text)
        list_diff.append(
            {
                "file": new_file,
-                "chunk": diff_text,
+                "chunk": output_diff_text,
            }
        )
    return list_diff
@@ -133,7 +133,7 @@ def create_comment(
    return comments


-def analyze_single_chunks(
+async def analyze_single_chunks(
    single_chunk_model: Model, parsed_diff: list[dict[str, Any]]
 ) -> list[dict[str, Any]]:
    """Analyze single chunks and create comments.
@@ -145,29 +145,33 @@ def analyze_single_chunks(
    Returns:
        list[dict[str, Any]]: comments for single chunk review
    """
-    comments = []
-    title = EVENT_DATA["pull_request"]["title"]
-    description = EVENT_DATA["pull_request"]["body"]
-    for diff in parsed_diff:
+
+    async def process_single_chunk(diff: dict[str, Any]):
        file = diff["file"]
        chunk = diff["chunk"]
-        response = single_chunk_model.get_response_single_chunk(
+        response = await single_chunk_model.get_response_single_chunk(
            file, title, description, chunk
        )
        response = response.strip("`").lstrip("json").strip() or "[]"

        try:
            response_json = json.loads(response)
-            new_comments = create_comment(file, response_json)
-            comments.extend(new_comments)
+            return create_comment(file, response_json)
        except json.JSONDecodeError:
            print(f"Failed to parse response: {response}")
-            continue
+            return []

+    title = EVENT_DATA["pull_request"]["title"]
+    description = EVENT_DATA["pull_request"]["body"]
+    tasks = [process_single_chunk(diff) for diff in parsed_diff]
+    results = await asyncio.gather(*tasks)
+
+    # Flatten the list of comments
+    comments = [comment for result in results for comment in result]
    return comments


-def get_file_content(file: str) -> str | None:
+async def get_file_content(file: str) -> str | None:
    """Get file content from Gitea.

    Args:
@@ -183,15 +187,18 @@ def get_file_content(file: str) -> str | None:
    url = f"{repo_url}/raw/{branch}%2F{replaced_file}?ref={branch}"

    try:
-        response = requests.get(url, headers=HEADERS)
-        response.raise_for_status()
-        return response.text
-    except requests.RequestException as e:
-        print(f"Failed to get file content: {e}")
-        return None
+        async with aiohttp.ClientSession(headers=HEADERS) as session:
+            async with session.get(url) as response:
+                response.raise_for_status()
+                return await response.text()
+    except aiohttp.ClientError as e:  # More specific exception handling
+        print(f"Network error fetching {file}: {e}")
+    except asyncio.TimeoutError:
+        print(f"Timeout fetching {file}")
+    return None


-def analyze_full_context(
+async def analyze_full_context(
    full_context_model: Model, parsed_diff: list[dict[str, Any]]
 ) -> str:
    """Analyze full context and create review.
@@ -203,22 +210,26 @@ def analyze_full_context(
    Returns:
        str: review for full context
    """
-    file_contents = []
-    for diff in parsed_diff:
+
+    async def get_file_data(diff: dict[str, Any]):
        file = diff["file"]
        chunk = diff["chunk"]
        content = get_file_content(file)
        if content is None:
-            continue
-        file_contents.append(f"File: {file}")
-        file_contents.append(content)
-        file_contents.append(f"Diff: {chunk}")
+            return None
+        return f"File: {file}\n{content}\nDiff: {chunk}"
+
+    tasks = [get_file_data(diff) for diff in parsed_diff]
+    file_contents_list = await asyncio.gather(*tasks)
+
+    file_contents = [item for item in file_contents_list if item is not None]
+
    if not file_contents:
        return ""

    title = EVENT_DATA["pull_request"]["title"]
    description = EVENT_DATA["pull_request"]["body"]
-    response = full_context_model.get_response_full_context(
+    response = await full_context_model.get_response_full_context(
        title, description, file_contents
    )
    response = response.strip("`").lstrip("markdown").strip()
@@ -248,10 +259,10 @@ def post_review(
    response.raise_for_status()


-def main() -> None:
-    """Code Reviewer for Gitea."""
+async def main() -> None:
+    """Code Reviewer for Gitea: Asynchronous version."""
    if EVENT_DATA["action"] not in ["opened", "synchronized"]:
-        print("Unsupproted event.")
+        print("Unsupported event.")
        return

    diff = get_diff()
@@ -273,10 +284,21 @@ def main() -> None:
    )

    parsed_diff = parse_diff(diff)
-    comments = analyze_single_chunks(single_chunk_model, parsed_diff)
-    full_context_response = analyze_full_context(full_context_model, parsed_diff)
+    comments_task = asyncio.create_task(
+        analyze_single_chunks(single_chunk_model, parsed_diff)
+    )
+
+    if EVENT_DATA["action"] == "opened":
+        full_context_response_task = asyncio.create_task(
+            analyze_full_context(full_context_model, parsed_diff)
+        )
+        full_context_response = await full_context_response_task
+    else:
+        full_context_response = ""
+
+    comments = await comments_task
    post_review(full_context_response, comments)


 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
--- a/.gitea/scripts/model.py
+++ b/.gitea/scripts/model.py
@@ -4,8 +4,16 @@ from enum import Enum
 from typing import Any

 import google.generativeai as genai
-from anthropic import Anthropic
-from openai import OpenAI
+import typing_extensions as typing
+from anthropic import AsyncAnthropic
+from openai import AsyncOpenAI
+
+
+class GoogleReponse(typing.TypedDict):
+    """The response from Google model."""
+
+    lineNumber: int
+    reviewComment: str


 class ModelProvider(Enum):
@@ -79,16 +87,18 @@ class Model:
        """
        match self.provider:
            case ModelProvider.OPENAI:
-                return OpenAI(api_key=api_key)
+                return AsyncOpenAI(api_key=api_key)
            case ModelProvider.ANTHROPIC:
-                return Anthropic(api_key=api_key)
+                return AsyncAnthropic(api_key=api_key)
            case ModelProvider.GOOGLE:
                genai.configure(api_key=api_key)
-                return genai.GenerativeModel(model=self.model, api_key=api_key)
+                return genai.GenerativeModel(
+                    model_name=self.model, system_instruction=self.system_prompt
+                )
            case ModelProvider.DEEPSEEK:
-                return OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
+                return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com")

-    def request(self, prompt: str) -> str:
+    async def request(self, prompt: str) -> str:
        """Request the model to generate a response.

        Args:
@@ -99,7 +109,7 @@ class Model:
        """
        match self.provider:
            case ModelProvider.OPENAI | ModelProvider.DEEPSEEK:
-                response = self.session.chat.completions.create(
+                response = await self.session.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": self.system_prompt},
@@ -113,7 +123,7 @@ class Model:
                )
                return response.choices[0].message.content.strip()
            case ModelProvider.ANTHROPIC:
-                response = self.session.messages.create(
+                response = await self.session.messages.create(
                    model=self.model,
                    messages=[{"role": "user", "content": prompt}],
                    system=[
@@ -128,10 +138,16 @@ class Model:
                )
                return response.content[0].text.strip()
            case ModelProvider.GOOGLE:
-                response = self.session.generate_content(prompt)
+                response = await self.session.generate_content_async(
+                    prompt,
+                    generation_config=genai.GenerationConfig(
+                        response_mime_type="application/json",
+                        response_schema=list[GoogleReponse],
+                    ),
+                )
                return response.text.strip()

-    def get_response_single_chunk(
+    async def get_response_single_chunk(
        self, file: str, title: str, description: str, chunk: str
    ) -> str:
        """Get the response for a single chunk.
@@ -146,9 +162,9 @@ class Model:
            str: The response.
        """
        prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk)
-        return self.request(prompt)
+        return await self.request(prompt)

-    def get_response_full_context(
+    async def get_response_full_context(
        self, title: str, description: str, file_contents: list[str]
    ) -> str:
        """Get the response for full context.
@@ -165,7 +181,7 @@ class Model:
            prompt = FULL_CONTEXT_USER_PROMPT.format(
                title, description, "\n".join(file_contents)
            )
-            return self.request(prompt)
+            return await self.request(prompt)
        except Exception as e:
            print(f"Error during full context response: {e}")
            print(prompt)
@@ -175,14 +191,21 @@ class Model:
 SINGLE_CHUNK_SYSTEM_PROMPT = (
    "Your task is to review pull requests. Instructions:\n"
    "- Provide the response in the following JSON format:  "
-    """[{{"lineNumber":  <line_number>, "reviewComment": "<review comment>"}}] \n"""
+    """[{{"lineNumber": int, "reviewComment": str}}] \n"""
    "- lineNumber is about the line number of the code that in new file. \n"
+    "- lineNumber can be found at the front of each line. \n"
+    "- At the first number is old line number, the second number is new line number. \n"
+    "- If the line starts with `+`, it means the line is added. \n"
+    "- If the line starts with `-`, it means the line is deleted. \n"
+    "- Evaluate whether the code changes and additions are appropriate "
+    "and if the new code structure is suitable. \n"
    "- Do not give positive comments or compliments. \n"
    "- Provide comments and suggestions ONLY if there is something to improve"
    "otherwise return an empty array. \n"
    "- Write the comment in GitHub Markdown format. \n"
    "- Use the given description only for the overall context "
    "and only comment the code. \n"
+    "- Do not suggest type hint or naming convention. \n"
    "- IMPORTANT: NEVER suggest adding comments to the code. \n"
 )
 SINGLE_CHUNK_USER_PROMPT = (
--- a/.gitea/workflows/code-review.yml
+++ b/.gitea/workflows/code-review.yml
@@ -21,15 +21,15 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install requests py-gitea openai anthropic google-generativeai
+          pip install aiohttp requests py-gitea openai anthropic google-generativeai

      - name: Run Code Review
        env:
          ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
-          FULL_CONTEXT_MODEL: gpt-4o
-          FULL_CONTEXT_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          SINGLE_CHUNK_MODEL: gpt-4o
-          SINGLE_CHUNK_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          FULL_CONTEXT_MODEL: deepseek-reasoner
+          FULL_CONTEXT_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
+          SINGLE_CHUNK_MODEL: gemini-2.0-flash-exp
+          SINGLE_CHUNK_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          EXCLUDE: "*.yml,*.yaml"
        run: python .gitea/scripts/code_review.py

--- a/.gitea/workflows/python-lint.yml
+++ b/.gitea/workflows/python-lint.yml
@@ -0,0 +1,30 @@
+name: mint_ci
+
+on:
+  push:
+    branches: [ "main"]
+  pull_request:
+    types: [unlabeled, opened, synchronize, reopened]
+
+env:
+  PYTHON_VERSION: "3.12.3"
+
+jobs:
+  lint:
+    name: Check Python code using ruff
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        token: ${{ secrets.ACCESS_TOKEN }}
+    - uses: astral-sh/ruff-action@v1
+      with:
+        version: 0.7.4
+        args: check . --select=E5,F4,F8,D400,D403,D417,D100,D102,D103,D101,ANN001,ANN201 --output-format=full --exclude='**/test_*.py','**/__init__.py' --force-exclude
+        changed-files: 'true'
+    - uses: astral-sh/ruff-action@v1
+      with:
+        version: 0.7.4
+        args: check . --select=I --output-format=full --force-exclude
+        changed-files: 'true'
+      continue-on-error: true
--- a/.gitea/workflows/rust-lint.yml
+++ b/.gitea/workflows/rust-lint.yml
@@ -0,0 +1,45 @@
+on:
+  push:
+    branches: [main]
+  pull_request:
+    types: [unlabeled, opened, synchronize, reopened]
+  merge_group:
+
+name: Rust-lint
+
+
+jobs:
+  rust_tests:
+    env:
+      RUST_BACKTRACE: full
+    name: Run rust tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          components: clippy
+      - uses: Swatinem/rust-cache@v2
+
+      - name: run clippy
+        run: cargo clippy --workspace  --exclude rustpython_wasm -- -Dwarnings
+
+      - name: run rust tests
+        run: cargo test --workspace --verbose
+
+      - name: check compilation without threading
+        run: cargo check
+
+
+  lint:
+    name: Check Rust code with rustfmt and clippy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+            components: rustfmt, clippy
+      - name: run rustfmt
+        run: cargo fmt --check
+
+