mirror of
https://github.com/jafioti/luminal.git
synced 2026-06-01 21:49:47 +09:00
* Refine Luminal graph rewrite handling * Generalize Metal scatter reuse and Qwen validation * Add Qwen safetensor size accounting * Fix Modal example imports for shared output validation * Clarify Luminal contributor guidance * Revert direct shard loading from qwen metal * Remove qwen Metal CI job * Add Metal Llama 1B CI and restore safe profiling timeouts * Fix duplicate Metal ops and tests * Fix Metal pipeline compilation on llama * Run llama Metal CI on xlarge runners * Resample search generations after timeout failures
49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
import os
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
def run_and_capture(command: list[str], *, cwd: str, env: dict[str, str]) -> str:
|
|
process = subprocess.Popen(
|
|
command,
|
|
cwd=cwd,
|
|
env=env,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
)
|
|
assert process.stdout is not None
|
|
|
|
chunks = []
|
|
while True:
|
|
chunk = process.stdout.read1(4096)
|
|
if not chunk:
|
|
break
|
|
sys.stdout.buffer.write(chunk)
|
|
sys.stdout.buffer.flush()
|
|
chunks.append(chunk)
|
|
|
|
return_code = process.wait()
|
|
output = b"".join(chunks).decode("utf-8", errors="replace")
|
|
if return_code:
|
|
raise subprocess.CalledProcessError(return_code, command, output=output)
|
|
return output
|
|
|
|
|
|
def main():
|
|
repo_root = os.environ.get("GITHUB_WORKSPACE", os.getcwd())
|
|
sys.path.insert(0, os.path.join(repo_root, "ci"))
|
|
from example_output import validate_output
|
|
|
|
output = run_and_capture(
|
|
["cargo", "run", "--release", "-p", "luminal_metal", "--example", "llama_1b"],
|
|
cwd=repo_root,
|
|
env=os.environ.copy(),
|
|
)
|
|
if "TTFT:" not in output or "TPOT:" not in output:
|
|
raise AssertionError("Llama 1B Metal example did not complete generation")
|
|
validate_output("llama", output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|