"""Model for code review.""" from enum import Enum from typing import Any import google.generativeai as genai import typing_extensions as typing from anthropic import AsyncAnthropic from openai import AsyncOpenAI class GoogleResponse(typing.TypedDict): """The response from Google model.""" lineNumber: int reviewComment: str class ModelProvider(Enum): """The model provider.""" OPENAI = "openai" ANTHROPIC = "anthropic" GOOGLE = "google" DEEPSEEK = "deepseek" @classmethod def from_model(cls, model: str) -> "ModelProvider": """Get the model provider from the model name. Args: model (str): The model name. Returns: ModelProvider: The model provider. """ for prefix, provider in PREFIX_TO_MODEL.items(): if model.startswith(prefix): return provider raise ValueError(f"Unknown model: {model}") PREFIX_TO_MODEL = { "gpt": ModelProvider.OPENAI, "o1": ModelProvider.OPENAI, "o3": ModelProvider.OPENAI, "claude": ModelProvider.ANTHROPIC, "gemini": ModelProvider.GOOGLE, "deepseek": ModelProvider.DEEPSEEK, } class Model: """The model class. Attributes: model (str): The model name. api_key (str): The API key. system_prompt (str): The system prompt. max_tokens (int): The maximum tokens. """ def __init__( # noqa: D107 self, model: str, api_key: str, is_full_context: bool, max_tokens: int = 4196, ): self.model = model self.system_prompt = ( FULL_CONTEXT_SYSTEM_PROMPT if is_full_context else SINGLE_CHUNK_SYSTEM_PROMPT ) self.max_tokens = max_tokens self.provider = ModelProvider.from_model(model) self.session = self.create_session(api_key) def create_session(self, api_key: str) -> Any: """Create a session for the model. Args: api_key (str): The API key. Returns: Any: The session. """ match self.provider: case ModelProvider.OPENAI: return AsyncOpenAI(api_key=api_key) case ModelProvider.ANTHROPIC: return AsyncAnthropic(api_key=api_key) case ModelProvider.GOOGLE: genai.configure(api_key=api_key) return genai.GenerativeModel( model_name=self.model, system_instruction=self.system_prompt ) case ModelProvider.DEEPSEEK: return AsyncOpenAI(api_key=api_key, base_url="https://api.deepseek.com") async def request(self, prompt: str) -> str: """Request the model to generate a response. Args: prompt (str): The prompt to generate a response for. Returns: str: The generated response. """ match self.provider: case ModelProvider.OPENAI | ModelProvider.DEEPSEEK: response = await self.session.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": prompt}, ], temperature=0.2, max_tokens=self.max_tokens, top_p=1, frequency_penalty=0, presence_penalty=0, ) return response.choices[0].message.content.strip() case ModelProvider.ANTHROPIC: response = await self.session.messages.create( model=self.model, messages=[{"role": "user", "content": prompt}], system=[ { "type": "text", "text": self.system_prompt, "cache_control": {"type": "ephemeral"}, } ], temperature=0.2, max_tokens=self.max_tokens, ) return response.content[0].text.strip() case ModelProvider.GOOGLE: response = await self.session.generate_content_async( prompt, generation_config=genai.GenerationConfig( response_mime_type="application/json", response_schema=list[GoogleResponse], ), ) return response.text.strip() async def get_response_single_chunk( self, file: str, title: str, description: str, chunk: str ) -> str: """Get the response for a single chunk. Args: file (str): The file name. title (str): The pull request title. description (str): The pull request description. chunk (str): The diff chunk. Returns: str: The response. """ prompt = SINGLE_CHUNK_USER_PROMPT.format(file, title, description, chunk) return await self.request(prompt) async def get_response_full_context( self, title: str, description: str, file_contents: list[str] ) -> str: """Get the response for full context. Args: title (str): The pull request title. description (str): The pull request description. file_contents (list[str]): The file contents, diffs. Returns: str: The response. """ try: prompt = FULL_CONTEXT_USER_PROMPT.format( title, description, "\n".join(file_contents) ) return await self.request(prompt) except Exception as e: print(f"Error during full context response: {e}") print(prompt) return None SINGLE_CHUNK_SYSTEM_PROMPT = ( "Your task is to review pull requests. Instructions:\n" "- Provide the response in the following JSON format: " """[{{"lineNumber": int, "reviewComment": str}}] \n""" "- lineNumber is about the line number of the code that in new file. \n" "- lineNumber can be found at the front of each line. \n" "- At the first number is old line number, the second number is new line number. \n" "- If the line starts with `+`, it means the line is added. \n" "- If the line starts with `-`, it means the line is deleted. \n" "- Evaluate whether the code changes and additions are appropriate " "and if the new code structure is suitable. \n" "- Do not give positive comments or compliments. \n" "- Provide comments and suggestions ONLY if there is something to improve" "otherwise return an empty array. \n" "- Write the comment in GitHub Markdown format. \n" "- Use the given description only for the overall context " "and only comment the code. \n" "- Do not suggest type hint or naming convention. \n" "- IMPORTANT: NEVER suggest adding comments to the code. \n" ) SINGLE_CHUNK_USER_PROMPT = ( "Review the following code diff in the file " "{} and take the pull request title and description into account " "when writing the response. \n" "Pull request title: {} \n" "Pull request description: \n" "--- \n" "{} \n" "--- \n" "Git diff to review: \n" "```diff \n" "{} \n" "```" ) FULL_CONTEXT_SYSTEM_PROMPT = ( "You are an experienced software engineer specializing in reviewing pull " "requests. Your task is to provide an overall code review summary for a PR. " "Focus on assessing the following aspects:\n" "1. **Code Structure & Architecture:** " "Evaluate whether the code is well-organized, modular, " "and adheres to clean code principles. Suggest improvements if needed.\n" "2. **Refactoring Opportunities:** " "Identify areas where the code can be optimized or simplified without changing " "its behavior.\n" "3. **Potential Future Problems:** " "Highlight possible scalability, maintainability, or dependency issues that might " "arise in the future based on the current implementation.\n" "Be constructive and clear in your feedback. Avoid commenting on trivial issues " "or syntax errors—focus on high-level feedback.\n" "Precise instructions:\n" "- Do not give positive comments or compliments.\n" "- Provide comments and suggestions ONLY if there is something to improve, " "otherwise return an empty string.\n" "- Write the comment in GitHub Markdown format.\n" "- Do not start with 'markdown' or '```markdown'.\n" "- IMPORTANT: Give example code block or pseudo code if you can.\n" ) FULL_CONTEXT_USER_PROMPT = ( "Review the following code and take the pull request title " "and description into account when writing the response. \n" "Pull request title: {} \n" "Pull request description: \n" "--- \n" "{} \n" "--- \n" "Code to review: \n" "{}" )