forked from Rust-related/RustPython
262 lines
8.9 KiB
Python
262 lines
8.9 KiB
Python
"""Model for code review."""
|
|
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
import google.generativeai as genai
|
|
import typing_extensions as typing
|
|
from anthropic import AsyncAnthropic
|
|
from openai import AsyncOpenAI
|
|
|
|
|
|
class GoogleResponse(typing.TypedDict):
|
|
"""The response from Google model."""
|
|
|
|
lineNumber: int
|
|
reviewComment: str
|
|
|
|
|
|
class ModelProvider(Enum):
|
|
"""The model provider."""
|
|
|
|
OPENAI = "openai"
|
|
ANTHROPIC = "anthropic"
|
|
GOOGLE = "google"
|
|
DEEPSEEK = "deepseek"
|
|
|
|
@classmethod
|
|
def from_model(cls, model: str) -> "ModelProvider":
|
|
"""Get the model provider from the model name.
|
|
|
|
Args:
|
|
model (str): The model name.
|
|
|
|
Returns:
|
|
ModelProvider: The model provider.
|
|
"""
|
|
for prefix, provider in PREFIX_TO_MODEL.items():
|
|
if model.startswith(prefix):
|
|
return provider
|
|
raise ValueError(f"Unknown model: {model}")
|
|
|
|
|
|
PREFIX_TO_MODEL = {
|
|
"gpt": ModelProvider.OPENAI,
|
|
"o1": ModelProvider.OPENAI,
|
|
"o3": ModelProvider.OPENAI,
|
|
"claude": ModelProvider.ANTHROPIC,
|
|
"gemini": ModelProvider.GOOGLE,
|
|
"deepseek": ModelProvider.DEEPSEEK,
|
|
}
|
|
|
|
|
|
class Model:
|
|
"""The model class.
|
|
|
|
Attributes:
|
|
model (str): The model name.
|
|
api_key (str): The API key.
|
|
system_prompt (str): The system prompt.
|
|
max_tokens (int): The maximum tokens.
|
|
"""
|
|
|
|
def __init__( # noqa: D107
|
|
self,
|
|
model: str,
|
|
api_key: str,
|
|
is_full_context: bool,
|
|
max_tokens: int = 4196,
|
|
):
|
|
self.model = model
|
|
self.system_prompt = (
|
|
FULL_CONTEXT_SYSTEM_PROMPT
|
|
if is_full_context
|
|
else SINGLE_CHUNK_SYSTEM_PROMPT
|
|
)
|
|
self.max_tokens = max_tokens
|
|
self.provider = ModelProvider.from_model(model)
|
|
self.session = self.create_session(api_key)
|
|
|
|
def create_session(self, api_key: str) -> Any:
|
|
"""Create a session for the model.
|
|
|
|
Args:
|
|
api_key (str): The API key.
|
|
|
|
Returns:
|
|
Any: The session.
|
|
"""
|
|
match self.provider:
|
|
case ModelProvider.OPENAI:
|
|
return AsyncOpenAI(api_key=api_key)
|
|
case ModelProvider.ANTHROPIC:
|
|
return AsyncAnthropic(api_key=api_key)
|
|
case ModelProvider.GOOGLE:
|
|
genai.configure(api_key=api_key)
|
|
return genai.GenerativeModel(
|
|
model_name=self.model, system_instruction=self.system_prompt
|
|
)
|
|
case ModelProvider.DEEPSEEK:
|
|
return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com")
|
|
|
|
async def request(self, prompt: str) -> str:
|
|
"""Request the model to generate a response.
|
|
|
|
Args:
|
|
prompt (str): The prompt to generate a response for.
|
|
|
|
Returns:
|
|
str: The generated response.
|
|
"""
|
|
match self.provider:
|
|
case ModelProvider.OPENAI | ModelProvider.DEEPSEEK:
|
|
response = await self.session.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": self.system_prompt},
|
|
{"role": "user", "content": prompt},
|
|
],
|
|
temperature=0.2,
|
|
max_tokens=self.max_tokens,
|
|
top_p=1,
|
|
frequency_penalty=0,
|
|
presence_penalty=0,
|
|
)
|
|
return response.choices[0].message.content.strip()
|
|
case ModelProvider.ANTHROPIC:
|
|
response = await self.session.messages.create(
|
|
model=self.model,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
system=[
|
|
{
|
|
"type": "text",
|
|
"text": self.system_prompt,
|
|
"cache_control": {"type": "ephemeral"},
|
|
}
|
|
],
|
|
temperature=0.2,
|
|
max_tokens=self.max_tokens,
|
|
)
|
|
return response.content[0].text.strip()
|
|
case ModelProvider.GOOGLE:
|
|
response = await self.session.generate_content_async(
|
|
prompt,
|
|
generation_config=genai.GenerationConfig(
|
|
response_mime_type="application/json",
|
|
response_schema=list[GoogleResponse],
|
|
),
|
|
)
|
|
return response.text.strip()
|
|
|
|
async def get_response_single_chunk(
|
|
self, file: str, title: str, description: str, chunk: str
|
|
) -> str:
|
|
"""Get the response for a single chunk.
|
|
|
|
Args:
|
|
file (str): The file name.
|
|
title (str): The pull request title.
|
|
description (str): The pull request description.
|
|
chunk (str): The diff chunk.
|
|
|
|
Returns:
|
|
str: The response.
|
|
"""
|
|
prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk)
|
|
return await self.request(prompt)
|
|
|
|
async def get_response_full_context(
|
|
self, title: str, description: str, file_contents: list[str]
|
|
) -> str:
|
|
"""Get the response for full context.
|
|
|
|
Args:
|
|
title (str): The pull request title.
|
|
description (str): The pull request description.
|
|
file_contents (list[str]): The file contents, diffs.
|
|
|
|
Returns:
|
|
str: The response.
|
|
"""
|
|
try:
|
|
prompt = FULL_CONTEXT_USER_PROMPT.format(
|
|
title, description, "\n".join(file_contents)
|
|
)
|
|
return await self.request(prompt)
|
|
except Exception as e:
|
|
print(f"Error during full context response: {e}")
|
|
print(prompt)
|
|
return None
|
|
|
|
|
|
SINGLE_CHUNK_SYSTEM_PROMPT = (
|
|
"Your task is to review pull requests. Instructions:\n"
|
|
"- Provide the response in the following JSON format: "
|
|
"""[{{"lineNumber": int, "reviewComment": str}}] \n"""
|
|
"- lineNumber is about the line number of the code that in new file. \n"
|
|
"- lineNumber can be found at the front of each line. \n"
|
|
"- At the first number is old line number, the second number is new line number. \n"
|
|
"- If the line starts with `+`, it means the line is added. \n"
|
|
"- If the line starts with `-`, it means the line is deleted. \n"
|
|
"- Evaluate whether the code changes and additions are appropriate "
|
|
"and if the new code structure is suitable. \n"
|
|
"- Do not give positive comments or compliments. \n"
|
|
"- Provide comments and suggestions ONLY if there is something to improve"
|
|
"otherwise return an empty array. \n"
|
|
"- Write the comment in GitHub Markdown format. \n"
|
|
"- Use the given description only for the overall context "
|
|
"and only comment the code. \n"
|
|
"- Do not suggest type hint or naming convention. \n"
|
|
"- IMPORTANT: NEVER suggest adding comments to the code. \n"
|
|
)
|
|
SINGLE_CHUNK_USER_PROMPT = (
|
|
"Review the following code diff in the file "
|
|
"{} and take the pull request title and description into account "
|
|
"when writing the response. \n"
|
|
"Pull request title: {} \n"
|
|
"Pull request description: \n"
|
|
"--- \n"
|
|
"{} \n"
|
|
"--- \n"
|
|
"Git diff to review: \n"
|
|
"```diff \n"
|
|
"{} \n"
|
|
"```"
|
|
)
|
|
|
|
FULL_CONTEXT_SYSTEM_PROMPT = (
|
|
"You are an experienced software engineer specializing in reviewing pull "
|
|
"requests. Your task is to provide an overall code review summary for a PR. "
|
|
"Focus on assessing the following aspects:\n"
|
|
"1. **Code Structure & Architecture:** "
|
|
"Evaluate whether the code is well-organized, modular, "
|
|
"and adheres to clean code principles. Suggest improvements if needed.\n"
|
|
"2. **Refactoring Opportunities:** "
|
|
"Identify areas where the code can be optimized or simplified without changing "
|
|
"its behavior.\n"
|
|
"3. **Potential Future Problems:** "
|
|
"Highlight possible scalability, maintainability, or dependency issues that might "
|
|
"arise in the future based on the current implementation.\n"
|
|
"Be constructive and clear in your feedback. Avoid commenting on trivial issues "
|
|
"or syntax errors—focus on high-level feedback.\n"
|
|
"Precise instructions:\n"
|
|
"- Do not give positive comments or compliments.\n"
|
|
"- Provide comments and suggestions ONLY if there is something to improve, "
|
|
"otherwise return an empty string.\n"
|
|
"- Write the comment in GitHub Markdown format.\n"
|
|
"- Do not start with 'markdown' or '```markdown'.\n"
|
|
"- IMPORTANT: Give example code block or pseudo code if you can.\n"
|
|
)
|
|
|
|
FULL_CONTEXT_USER_PROMPT = (
|
|
"Review the following code and take the pull request title "
|
|
"and description into account when writing the response. \n"
|
|
"Pull request title: {} \n"
|
|
"Pull request description: \n"
|
|
"--- \n"
|
|
"{} \n"
|
|
"--- \n"
|
|
"Code to review: \n"
|
|
"{}"
|
|
)
|