enhanced code extraction for thinking models and recalculated some benchmarks
This commit is contained in:
@@ -32,7 +32,7 @@ The "Economic Score" is the average performance per bytes of model size (times 1
|
||||
| hf.co/bartowski/Sky-T1-32B-Preview-GGUF:Q4_K_M | 16.40 | 10.33 | 63 | 32.8 | 4 | 32 | 12.72 | 11.67 | 7.25 | 2.89 |
|
||||
| athene-v2:72b-q4_K_M | | 10.29 | 28 | 72.7 | 4 | 128 | 14.07 | 11.19 | 6.55 | 0.0 |
|
||||
| hf.co/bartowski/Sky-T1-32B-Preview-GGUF:Q8_0 | | 10.28 | 31 | 32.8 | 8 | 32 | 12.76 | 10.75 | 8.04 | 3.43 |
|
||||
| GPT-o1-Preview | | 10.23 | 2 | 300.0 | 16 | 32 | 15.86 | | | |
|
||||
| GPT-o1-Preview | | 10.22 | 2 | 300.0 | 16 | 32 | 15.86 | | | |
|
||||
| qwen2.5:72b-instruct-q4_K_M | | 9.78 | 27 | 72.7 | 4 | 128 | 14.02 | 9.1 | 5.97 | 2.46 |
|
||||
| qwen2.5:72b-instruct-q8_0 | | 9.77 | 13 | 72.7 | 8 | 128 | 12.98 | 10.5 | 5.41 | 3.49 |
|
||||
| qwen2.5-coder:32b-instruct-q4_K_M | 16.40 | 9.77 | 60 | 32.8 | 4 | 32 | 14.05 | 8.82 | 6.41 | 2.2 |
|
||||
@@ -66,6 +66,7 @@ The "Economic Score" is the average performance per bytes of model size (times 1
|
||||
| qwen3:30b-a3b-q4_K_M-no_think | | 6.31 | 41 | 30.5 | 4 | 128 | 8.22 | 8.01 | | |
|
||||
| vanilj/Phi-4:Q8_0 | | 6.13 | 42 | 14.7 | 8 | 16 | 9.06 | 5.73 | 3.52 | 0.84 |
|
||||
| yi-coder:9b-chat-q4_K_M | 4.40 | 5.97 | 136 | 8.8 | 4 | 128 | 7.44 | 6.04 | 5.76 | 0.34 |
|
||||
| qwq:32b-preview-q8_0 | | 5.97 | 18 | 32.8 | 8 | 32 | 10.15 | 3.11 | 3.88 | 1.97 |
|
||||
| cogito:70b-v1-preview-llama-q4_K_M | | 5.97 | 17 | 70.6 | 4 | 128 | 7.7 | 7.26 | 3.54 | 0.0 |
|
||||
| hf.co/bartowski/Anubis-70B-v1-GGUF:Q4_K_M | | 5.83 | 17 | 70.6 | 4 | 128 | 8.07 | 6.49 | 2.59 | 1.36 |
|
||||
| llama3.1:70b-instruct-q8_0 | | 5.81 | 8 | 70.6 | 8 | 128 | 8.19 | 5.36 | 3.8 | 1.7 |
|
||||
@@ -82,7 +83,6 @@ The "Economic Score" is the average performance per bytes of model size (times 1
|
||||
| tulu3:70b-q4_K_M | | 5.21 | 15 | 70.6 | 4 | 128 | 7.31 | 4.68 | 3.35 | 2.15 |
|
||||
| aravhawk/llama4:scout-q4_K_M | | 5.17 | 10 | 107.8 | 4 | | 6.78 | 5.64 | 3.55 | 0.59 |
|
||||
| hf.co/ozone-ai/0x-lite-Q4_K_M-GGUF:latest | | 5.14 | 69 | 14.8 | 4 | 32 | 7.66 | 4.52 | 3.32 | 0.56 |
|
||||
| qwq:32b-preview-q8_0 | | 5.09 | 16 | 32.8 | 8 | 32 | 9.68 | 2.94 | 1.39 | 0.54 |
|
||||
| command-a:111b-03-2025-q4_K_M | | 5.06 | 9 | 111.1 | 4 | 256 | 6.33 | 5.17 | 4.21 | 1.32 |
|
||||
| hf.co/bartowski/Qwen2.5-14B-Instruct-1M-GGUF:Q4_K_M | | 4.94 | 67 | 14.8 | 4 | 986 | 7.75 | 4.02 | 2.74 | 0.81 |
|
||||
| falcon3:10b-instruct-q4_K_M | | 4.88 | 95 | 10.3 | 4 | 32 | 6.62 | 5.77 | 2.19 | 0.6 |
|
||||
@@ -103,7 +103,7 @@ The "Economic Score" is the average performance per bytes of model size (times 1
|
||||
| mistral-small:24b-instruct-2501-q4_K_M | | 3.98 | 34 | 23.6 | 4 | 32 | 6.32 | 2.89 | 2.47 | 0.93 |
|
||||
| qwen2.5-coder:7b-instruct-q4_K_M | 3.80 | 3.98 | 105 | 7.6 | 4 | 32 | 4.76 | 4.7 | 2.87 | 0.87 |
|
||||
| hf.co/bartowski/Tesslate_Tessa-Rust-T1-7B-GGUF:Q4_K_M | | 3.94 | 104 | 7.62 | 4 | 32 | 6.14 | 3.2 | 2.28 | 0.73 |
|
||||
| hf.co/bartowski/open-thoughts_OpenThinker-32B-GGUF:Q4_K_M | | 3.89 | 24 | 32.8 | 4 | 32 | 4.2 | 4.22 | 3.22 | 2.95 |
|
||||
| hf.co/bartowski/open-thoughts_OpenThinker-32B-GGUF:Q4_K_M | | 3.90 | 24 | 32.8 | 4 | 32 | 4.2 | 4.22 | 3.28 | 2.95 |
|
||||
| gemma3:27b | | 3.73 | 27 | 27.4 | 4 | 128 | 7.15 | 0.14 | 3.8 | 0.64 |
|
||||
| hf.co/bartowski/HelpingAI_Helpingai3-raw-GGUF:Q4_K_M | | 3.70 | 72 | 10.3 | 4 | 32 | 5.88 | 3.82 | 0.92 | 0.13 |
|
||||
| gemma2:27b-instruct-q8_0 | | 3.65 | 13 | 27.2 | 8 | 8 | 5.18 | 3.3 | 2.47 | 0.98 |
|
||||
@@ -137,13 +137,13 @@ The "Economic Score" is the average performance per bytes of model size (times 1
|
||||
| phi3:14b-medium-128k-instruct-q8_0 | | 2.24 | 16 | 14.0 | 8 | 128 | 4.21 | 1.55 | 0.42 | 0.04 |
|
||||
| hf.co/bartowski/THUDM_GLM-Z1-32B-0414-GGUF:Q4_K_M | | 2.24 | 14 | 32.6 | 4 | 32 | 3.13 | 1.58 | 2.0 | 1.1 |
|
||||
| llama3.1:8b-instruct-q8_0 | | 2.03 | 25 | 8.0 | 8 | 128 | 3.26 | 1.78 | 0.94 | 0.09 |
|
||||
| qwq:32b-q4_K_M | | 2.02 | 12 | 32.8 | 4 | 128 | 2.51 | 1.75 | 1.32 | 2.25 |
|
||||
| tulu3:8b-q8_0 | | 2.01 | 25 | 8.0 | 8 | 128 | 3.91 | 1.06 | 0.42 | 0.49 |
|
||||
| hf.co/bartowski/open-r1_OlympicCoder-7B-GGUF:Q4_K_M | | 1.92 | 55 | 7.0 | 4 | 32 | 2.43 | 2.86 | 0.28 | 0.33 |
|
||||
| hf.co/bartowski/Yi-1.5-6B-Chat-GGUF:Q8_0 | | 1.87 | 31 | 6.06 | 8 | 4 | 3.92 | 0.92 | 0.13 | 0.0 |
|
||||
| hf.co/bartowski/Yi-1.5-6B-Chat-GGUF:Q4_K_M | | 1.81 | 60 | 6.06 | 4 | 4 | 3.71 | 0.87 | 0.32 | 0.0 |
|
||||
| deepseek-llm:67b-chat-q4_K_M | | 1.79 | 5 | 67.0 | 4 | 4 | 2.94 | 1.63 | 0.5 | 0.23 |
|
||||
| hf.co/katanemo/Arch-Function-3B.gguf:Q4_K_M | 1.54 | 1.76 | 114 | 3.09 | 4 | 32 | 2.81 | 1.55 | 0.66 | 0.43 |
|
||||
| qwq:32b-q4_K_M | | 1.74 | 11 | 32.8 | 4 | 128 | 1.82 | 1.75 | 1.32 | 2.23 |
|
||||
| qwen2.5:3b-instruct-q4_K_M | | 1.70 | 110 | 3.1 | 4 | 128 | 2.95 | 1.35 | 0.56 | 0.05 |
|
||||
| deepseek-r1:70b-llama-distill-q4_K_M | | 1.69 | 5 | 70.6 | 4 | 128 | 2.17 | 2.49 | 0.19 | 0.41 |
|
||||
| deepseek-r1:14b-qwen-distill-q4_K_M | | 1.69 | 23 | 14.8 | 4 | 128 | 2.6 | 1.02 | 1.19 | 1.02 |
|
||||
|
||||
@@ -265,6 +265,15 @@
|
||||
"python-100": 10.23,
|
||||
"rust-100": 4.52
|
||||
},
|
||||
"qwq:32b-preview-q8_0": {
|
||||
"_context_size": 32,
|
||||
"_parameter_size": 32.8,
|
||||
"_quantization_level": 8,
|
||||
"clojure-100": 1.97,
|
||||
"java-100": 3.11,
|
||||
"python-100": 10.15,
|
||||
"rust-100": 3.88
|
||||
},
|
||||
"GPT-3.5-Turbo": {
|
||||
"_context_size": 16,
|
||||
"_parameter_size": 175.0,
|
||||
@@ -301,15 +310,6 @@
|
||||
"python-100": 9.7,
|
||||
"rust-100": 4.55
|
||||
},
|
||||
"qwq:32b-preview-q8_0": {
|
||||
"_context_size": 32,
|
||||
"_parameter_size": 32.8,
|
||||
"_quantization_level": 8,
|
||||
"clojure-100": 0.54,
|
||||
"java-100": 2.94,
|
||||
"python-100": 9.68,
|
||||
"rust-100": 1.39
|
||||
},
|
||||
"phi4:14b": {
|
||||
"_context_size": 16,
|
||||
"_parameter_size": 14.7,
|
||||
@@ -988,7 +988,7 @@
|
||||
"clojure-100": 2.95,
|
||||
"java-100": 4.22,
|
||||
"python-100": 4.2,
|
||||
"rust-100": 3.22
|
||||
"rust-100": 3.28
|
||||
},
|
||||
"yi:9b-chat-v1.5-q4_K_M": {
|
||||
"_context_size": 4,
|
||||
@@ -1187,6 +1187,15 @@
|
||||
"python-100": 2.53,
|
||||
"rust-100": 0.3
|
||||
},
|
||||
"qwq:32b-q4_K_M": {
|
||||
"_context_size": 128,
|
||||
"_parameter_size": 32.8,
|
||||
"_quantization_level": 4,
|
||||
"clojure-100": 2.25,
|
||||
"java-100": 1.75,
|
||||
"python-100": 2.51,
|
||||
"rust-100": 1.32
|
||||
},
|
||||
"qwen2-math:7b-instruct-q8_0": {
|
||||
"_context_size": 4,
|
||||
"_parameter_size": 7.6,
|
||||
@@ -1394,15 +1403,6 @@
|
||||
"python-100": 1.83,
|
||||
"rust-100": 0.24
|
||||
},
|
||||
"qwq:32b-q4_K_M": {
|
||||
"_context_size": 128,
|
||||
"_parameter_size": 32.8,
|
||||
"_quantization_level": 4,
|
||||
"clojure-100": 2.23,
|
||||
"java-100": 1.75,
|
||||
"python-100": 1.82,
|
||||
"rust-100": 1.32
|
||||
},
|
||||
"hf.co/bartowski/google_gemma-3-4b-it-qat-GGUF:Q4_0": {
|
||||
"_context_size": 128,
|
||||
"_parameter_size": 3.88,
|
||||
@@ -2168,4 +2168,4 @@
|
||||
"_parameter_size": 235.1,
|
||||
"_quantization_level": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -25,7 +25,40 @@ def get_extension(language):
|
||||
else:
|
||||
raise Exception(f"Unsupported language: {language}")
|
||||
|
||||
thinking_remove_tags = [
|
||||
["<|begin_of_thought|>", "<|end_of_thought|>"],
|
||||
["<think>", "</think>"],
|
||||
["<think>", "<|im_start|>"],
|
||||
["<thinking>", "</thinking>"],
|
||||
["<thought>", "</thought>"],
|
||||
["<Thought>", "</Thought>"],
|
||||
["<reason>", "</reason>"],
|
||||
["<reasoning>", "</reasoning>"]
|
||||
]
|
||||
|
||||
thinking_keep_tags = [
|
||||
["<|begin_of_solution|>", "<|end_of_solution|>"]
|
||||
]
|
||||
|
||||
def extract_code_block(markdown_content, language, extension):
|
||||
# remove thinking parts from the markdown content
|
||||
for tag_pair in thinking_remove_tags:
|
||||
start_tag, end_tag = tag_pair
|
||||
start = markdown_content.find(start_tag)
|
||||
if start != -1:
|
||||
end = markdown_content.find(end_tag, start)
|
||||
if end != -1:
|
||||
# remove everything from the beginning of the text to the end of the thought
|
||||
markdown_content = markdown_content[end + len(end_tag):]
|
||||
for tag_pair in thinking_keep_tags:
|
||||
start_tag, end_tag = tag_pair
|
||||
start = markdown_content.find(start_tag)
|
||||
if start != -1:
|
||||
end = markdown_content.find(end_tag, start)
|
||||
if end != -1:
|
||||
# now we want to keep what is between the two tags
|
||||
markdown_content = markdown_content[start + len(start_tag):end]
|
||||
|
||||
# Regular expression to find code blocks between triple backticks
|
||||
code_block_pattern = re.compile(r'```(.*?)```', re.DOTALL)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user