making endpoint class main argument in ollama client
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from ollama_client import ollama_delete
|
from ollama_client import ollama_delete, Endpoint
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = ArgumentParser(description="Cleanup models: delete models from ollama endpoints")
|
parser = ArgumentParser(description="Cleanup models: delete models from ollama endpoints")
|
||||||
@@ -14,7 +14,8 @@ def main():
|
|||||||
|
|
||||||
for api_stub in api_base:
|
for api_stub in api_base:
|
||||||
try:
|
try:
|
||||||
ollama_delete(api_stub, model_name)
|
endpoint = Endpoint(store_name=model_name, api_name=model_name, key="", url=f"{api_stub}/v1/chat/completions")
|
||||||
|
ollama_delete(endpoint)
|
||||||
print(f"Model {model_name} removed from {api_stub}.")
|
print(f"Model {model_name} removed from {api_stub}.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# the server is not available
|
# the server is not available
|
||||||
|
|||||||
@@ -160,7 +160,8 @@ def main():
|
|||||||
raise Exception("The --allmodels option cannot be used in combination with --endpoint.")
|
raise Exception("The --allmodels option cannot be used in combination with --endpoint.")
|
||||||
|
|
||||||
# loop over all models provided by ollama and run those which are missing in benchmark.json
|
# loop over all models provided by ollama and run those which are missing in benchmark.json
|
||||||
models = ollama_list()
|
local_endpoint = Endpoint(store_name=model, api_name=model, key="", url=f"{api_base[0]}/v1/chat/completions")
|
||||||
|
models = ollama_list(local_endpoint)
|
||||||
print(f"Found {len(models)} models in ollama.")
|
print(f"Found {len(models)} models in ollama.")
|
||||||
for model in models:
|
for model in models:
|
||||||
# in every loop we load the benchmark.json again because it might have been updated
|
# in every loop we load the benchmark.json again because it might have been updated
|
||||||
|
|||||||
@@ -28,41 +28,27 @@ class Endpoint:
|
|||||||
def get_ollama_url_stub(self) -> str:
|
def get_ollama_url_stub(self) -> str:
|
||||||
"""Get the base URL for the ollama API"""
|
"""Get the base URL for the ollama API"""
|
||||||
return urllib3.util.url.parse_url(self.url)._replace(path='').url
|
return urllib3.util.url.parse_url(self.url)._replace(path='').url
|
||||||
|
|
||||||
def get_ollama_pull_url(self) -> str:
|
|
||||||
"""Get the URL for the ollama pull command"""
|
|
||||||
return urllib3.util.url.parse_url(self.url)._replace(path='/api/pull').url
|
|
||||||
|
|
||||||
def get_ollama_delete_url(self) -> str:
|
|
||||||
"""Get the URL for the ollama delete command"""
|
|
||||||
return urllib3.util.url.parse_url(self.url)._replace(path='/api/delete').url
|
|
||||||
|
|
||||||
def get_ollama_ls_url(self) -> str:
|
|
||||||
"""Get the URL for the ollama list command"""
|
|
||||||
return urllib3.util.url.parse_url(self.url)._replace(path='/api/tags').url
|
|
||||||
|
|
||||||
def get_ollama_ps_url(self) -> str:
|
|
||||||
"""Get the URL for the ollama ps command"""
|
|
||||||
return urllib3.util.url.parse_url(self.url)._replace(path='/api/ps').url
|
|
||||||
|
|
||||||
def ollama_pull(api_base='http://localhost:11434', model='llama3.2:latest') -> bool:
|
def ollama_pull(endpoint: Endpoint) -> bool:
|
||||||
|
api_base = endpoint.get_ollama_url_stub()
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
response = requests.request("POST", f"{api_base}/api/pull", verify=False,
|
response = requests.request("POST", f"{api_base}/api/pull", verify=False,
|
||||||
headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
|
headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
|
||||||
json={"model": model, "stream": False})
|
json={"model": endpoint.api_name, "stream": False})
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return not data.get("error", False)
|
return not data.get("error", False)
|
||||||
|
|
||||||
def ollama_delete(api_base='http://localhost:11434', model='llama3.2:latest') -> bool:
|
def ollama_delete(endpoint: Endpoint) -> bool:
|
||||||
|
api_base = endpoint.get_ollama_url_stub()
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
|
|
||||||
response = requests.request("DELETE", f"{api_base}/api/delete", verify=False,
|
response = requests.request("DELETE", f"{api_base}/api/delete", verify=False,
|
||||||
headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
|
headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
|
||||||
json={"model": model})
|
json={"model": endpoint.api_name})
|
||||||
return response.status_code == 200
|
return response.status_code == 200
|
||||||
|
|
||||||
def ollama_list(api_base='http://localhost:11434') -> dict:
|
def ollama_list(endpoint: Endpoint) -> dict:
|
||||||
|
api_base = endpoint.get_ollama_url_stub()
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
response = requests.get(f"{api_base}/api/tags", verify=False)
|
response = requests.get(f"{api_base}/api/tags", verify=False)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@@ -78,13 +64,13 @@ def ollama_list(api_base='http://localhost:11434') -> dict:
|
|||||||
def ollama_pull_endpoint(endpoint: Endpoint) -> Endpoint:
|
def ollama_pull_endpoint(endpoint: Endpoint) -> Endpoint:
|
||||||
# check if the endpoint servers are online and the model is available
|
# check if the endpoint servers are online and the model is available
|
||||||
# we do not catch exceptions here, because that shall be done in calling code
|
# we do not catch exceptions here, because that shall be done in calling code
|
||||||
api_base = endpoint.get_ollama_url_stub()
|
list = ollama_list(endpoint)
|
||||||
list = ollama_list(api_base)
|
|
||||||
if endpoint.api_name in list: return endpoint
|
if endpoint.api_name in list: return endpoint
|
||||||
|
|
||||||
# pull the model if it is not available
|
# pull the model if it is not available
|
||||||
|
api_base = endpoint.get_ollama_url_stub()
|
||||||
print(f"Model {endpoint.api_name} is not available on server {api_base}. Pulling the model...")
|
print(f"Model {endpoint.api_name} is not available on server {api_base}. Pulling the model...")
|
||||||
ollama_pull(api_base, endpoint.api_name)
|
ollama_pull(endpoint)
|
||||||
print(f"Model {endpoint.api_name} is now available on server {api_base}.")
|
print(f"Model {endpoint.api_name} is now available on server {api_base}.")
|
||||||
return endpoint
|
return endpoint
|
||||||
|
|
||||||
@@ -468,7 +454,7 @@ def main():
|
|||||||
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
|
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
|
||||||
|
|
||||||
# access the ollama API
|
# access the ollama API
|
||||||
models_dict = ollama_list(api_base[0])
|
models_dict = ollama_list(endpoints[0])
|
||||||
for (model, attr) in models_dict.items():
|
for (model, attr) in models_dict.items():
|
||||||
print(f"Model: {model}: {attr}")
|
print(f"Model: {model}: {attr}")
|
||||||
try:
|
try:
|
||||||
|
|||||||
15
test.py
15
test.py
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from benchmark import read_benchmark, write_benchmark
|
from benchmark import read_benchmark, write_benchmark
|
||||||
from ollama_client import ollama_list
|
from ollama_client import ollama_list, Endpoint
|
||||||
|
|
||||||
def test(api_base, endpoint_name, model_name, language, overwrite_existing, overwrite_failed, max_problem_number=100, think=False, no_think=False):
|
def test(api_base, endpoint_name, model_name, language, overwrite_existing, overwrite_failed, max_problem_number=100, think=False, no_think=False):
|
||||||
# call inference.py
|
# call inference.py
|
||||||
@@ -33,6 +33,7 @@ def test(api_base, endpoint_name, model_name, language, overwrite_existing, over
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = ArgumentParser(description="Run the complete pipeline to execute solutions and store results in a JSON file.")
|
parser = ArgumentParser(description="Run the complete pipeline to execute solutions and store results in a JSON file.")
|
||||||
|
parser.add_argument('--api', action='append', help="Specify (multiple) backend OpenAI API endpoints (i.e. ollama); can be used multiple times")
|
||||||
parser.add_argument('--api_base', required=False, default='http://localhost:11434', help='API base URL for the LLM, default is http://localhost:11434')
|
parser.add_argument('--api_base', required=False, default='http://localhost:11434', help='API base URL for the LLM, default is http://localhost:11434')
|
||||||
parser.add_argument('--allmodels', action='store_true', help='loop over all models provided by ollama and run those which are missing in benchmark.json')
|
parser.add_argument('--allmodels', action='store_true', help='loop over all models provided by ollama and run those which are missing in benchmark.json')
|
||||||
parser.add_argument('--model', required=False, default='llama3.2:latest', help='Name of the model to use, default is llama3.2:latest')
|
parser.add_argument('--model', required=False, default='llama3.2:latest', help='Name of the model to use, default is llama3.2:latest')
|
||||||
@@ -48,7 +49,7 @@ def main():
|
|||||||
parser.add_argument('--nall', action='store_true', help='all problems')
|
parser.add_argument('--nall', action='store_true', help='all problems')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
api_base = args.api_base
|
api_base = args.api if args.api else args.api_base.split(",") if "," in args.api_base else [args.api_base]
|
||||||
model_name = args.model
|
model_name = args.model
|
||||||
max_problem_number = 100
|
max_problem_number = 100
|
||||||
if args.n100: max_problem_number = 100
|
if args.n100: max_problem_number = 100
|
||||||
@@ -61,7 +62,8 @@ def main():
|
|||||||
|
|
||||||
# find models to test
|
# find models to test
|
||||||
models = []
|
models = []
|
||||||
model_dict = ollama_list()
|
local_endpoint = Endpoint(store_name=model_name, api_name=model_name, key="", url=f"{api_base[0]}/v1/chat/completions")
|
||||||
|
model_dict = ollama_list(local_endpoint)
|
||||||
if args.allmodels:
|
if args.allmodels:
|
||||||
if endpoint_name:
|
if endpoint_name:
|
||||||
raise Exception("The --allmodels option cannot be used in combination with --endpoint.")
|
raise Exception("The --allmodels option cannot be used in combination with --endpoint.")
|
||||||
@@ -93,7 +95,7 @@ def main():
|
|||||||
# add metadata to benchmark.json
|
# add metadata to benchmark.json
|
||||||
if not model_benchmark_name in benchmark or not bench_name in benchmark[model_benchmark_name] or overwrite_existing or overwrite_failed:
|
if not model_benchmark_name in benchmark or not bench_name in benchmark[model_benchmark_name] or overwrite_existing or overwrite_failed:
|
||||||
# run the model; this writes a news entry to benchmark.json
|
# run the model; this writes a news entry to benchmark.json
|
||||||
test(api_base, endpoint_name, model, language, overwrite_existing, overwrite_failed, max_problem_number, think = args.think, no_think = args.no_think)
|
test(",".join(api_base), endpoint_name, model, language, overwrite_existing, overwrite_failed, max_problem_number, think = args.think, no_think = args.no_think)
|
||||||
# load benchmark.json again because the test has updated it
|
# load benchmark.json again because the test has updated it
|
||||||
benchmark = read_benchmark()
|
benchmark = read_benchmark()
|
||||||
# because testing can be interrupted, there is no guarantee that the entry is present
|
# because testing can be interrupted, there is no guarantee that the entry is present
|
||||||
@@ -102,18 +104,21 @@ def main():
|
|||||||
# check if attributes parameter_size and quantization_level are present in benchmark.json
|
# check if attributes parameter_size and quantization_level are present in benchmark.json
|
||||||
parameter_size = model_dict.get(model,{}).get('parameter_size', None)
|
parameter_size = model_dict.get(model,{}).get('parameter_size', None)
|
||||||
if parameter_size:
|
if parameter_size:
|
||||||
|
if parameter_size.endswith("B"):
|
||||||
|
parameter_size = parameter_size[:-1]
|
||||||
try:
|
try:
|
||||||
parameter_size = float(parameter_size)
|
parameter_size = float(parameter_size)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f"Warning: Could not convert parameter_size '{parameter_size}' to float for model {model}")
|
print(f"Warning: Could not convert parameter_size '{parameter_size}' to float for model {model}")
|
||||||
parameter_size = None
|
parameter_size = None
|
||||||
quantization_level = model_dict.get(model,{}).get('parameter_size', None)
|
quantization_level = model_dict.get(model,{}).get('quantization_level', None)
|
||||||
if quantization_level:
|
if quantization_level:
|
||||||
try:
|
try:
|
||||||
quantization_level = int(quantization_level)
|
quantization_level = int(quantization_level)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f"Warning: Could not convert quantization_level '{quantization_level}' to int for model {model}")
|
print(f"Warning: Could not convert quantization_level '{quantization_level}' to int for model {model}")
|
||||||
quantization_level = None
|
quantization_level = None
|
||||||
|
if not quantization_level and model.endswith("Q4_K_M"): quantization_level = 4
|
||||||
if not '_parameter_size' in entry and parameter_size: entry['_parameter_size'] = parameter_size
|
if not '_parameter_size' in entry and parameter_size: entry['_parameter_size'] = parameter_size
|
||||||
if not '_quantization_level' in entry and quantization_level: entry['_quantization_level'] = quantization_level
|
if not '_quantization_level' in entry and quantization_level: entry['_quantization_level'] = quantization_level
|
||||||
entry = dict(sorted(entry.items(), key=lambda item: item[0]))
|
entry = dict(sorted(entry.items(), key=lambda item: item[0]))
|
||||||
|
|||||||
Reference in New Issue
Block a user