Update policies and attempt logs on main.

Sync workflow rules for merge-time gating and micro-tuning limits, and bring recent failed-attempt diagnostics into the README without including train.py or experiment artifacts.

Made-with: Cursor
This commit is contained in:
demian3b
2026-04-16 23:21:21 +09:00
parent 12b2fac462
commit 0fc754b38e
4 changed files with 105 additions and 39 deletions

View File

@@ -6,3 +6,4 @@ repos:
entry: python scripts/precommit_performance_gate.py
language: system
pass_filenames: false
stages: [pre-commit, post-merge]

View File

@@ -39,6 +39,15 @@ Make overfitting robust and measurable, targeting `mean_rmsd_100 <= 1.0`.
- Pure hyperparameter sweeps (LR, batch size, seed, minor weight nudges) are not treated as standalone attempts.
- Hyperparameter changes are allowed only as supporting details within a larger conceptual change.
## Micro-tuning cap per strategy
- For each new strategy (new research-level concept), micro-tuning is capped at **5 runs**.
- Micro-tuning includes LR/seed/batch/clip/time-power/weight nudges that do not change the core concept.
- After 5 micro-tuning runs for that strategy, stop tuning it and either:
- promote the best result from that strategy, or
- declare the strategy exhausted in `README.md` and move to a new independent strategy.
- Do not reset this counter by branching or renaming; count is per strategy idea.
## Non-negotiable flow-matching rule
- Time conditioning in training must be random every sample (middle-time flow supervision).

View File

@@ -84,3 +84,11 @@ This repository is intentionally pinned to CUDA 12.6 PyTorch wheels and matching
- 2026-04-16: Branch `attempt/geodesic-stability-next`: stress-tested geodesic+residual variants; best observed metric reached `mean_rmsd_100=2.388103` (`--rotation-loss geodesic --gcn-residual --epochs=280 --batch-size=24 --lr=7e-4 --seed=1`), with occasional NaN instability in nearby runs.
- 2026-04-16: Stabilization-only update: added non-finite guards/clamps in geodesic loss, Kabsch RMSD, and training loss fallback to reduce NaN-caused crashes during long geodesic sweeps.
- 2026-04-16: Policy update in `GUIDELINES.md`: when a branch obtains a strict best `mean_rmsd_100`, integration into `main` is mandatory before continuing new branch experiments.
- 2026-04-16: Hook policy update: `train-performance-gate` now runs at both commit-time and `post-merge`, and enforces main-branch merge-time validation/refresh when merged diff includes `train.py`.
- 2026-04-16: Attempt AB (trajectory-instability hypothesis): added `--omega-max-norm` clipping to stabilize geodesic+residual rotation outputs and reduce NaN-prone spikes; run with `--omega-max-norm 3.0` reached `mean_rmsd_100=2.436618` (more stable but worse than branch best `2.388103`).
- 2026-04-16: Strategy S1 (hybrid rotation loss, capped at 5 micro-tuning runs) completed: alpha sweep (`0.7/0.5/0.3`) then lr/seed tuning on best alpha; best S1 result was `mean_rmsd_100=2.439254` (no new best), strategy marked exhausted.
- 2026-04-16: Strategy S2 (rotation-weight curriculum, capped at 5 micro-tuning runs) completed: best run used `--rotation-weight-start 1.0 --rotation-weight-warmup-epochs 120` and reached `mean_rmsd_100=2.417450` (no new best), strategy marked exhausted.
- 2026-04-16: Multi-GPU parallel sweep (GPU0/1/2) around residual-geodesic schedules produced `2.394431`, `2.420601`, and `2.450024`; no update over branch best `2.388103`.
- 2026-04-16: Follow-up parallel sweep (GPU0/1/2) with direct best-axis reruns produced `2.430481`, `2.412036`, and `2.720380`; observed heavy seed sensitivity and intermittent fallback-to-1000 behavior on unstable seeds.
- 2026-04-16: Continued parallel sweep with rotation curriculum variants (`start=0.85/0.95` and lower-lr schedule) produced `2.450391`, `2.457748`, and `2.426384`; no improvement over branch best `2.388103`.
- 2026-04-16: Deep schedule parallel sweep (`epochs=320~380`, `start=1.0` with warmup variants, multi-seed) produced `2.464117`, `2.410706`, and `2.419527`; still below branch best and showed late-epoch fallback instability in 일부 runs.

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import json
import os
import shutil
import subprocess
import sys
@@ -23,6 +24,11 @@ def staged_files() -> set[str]:
return {line for line in out.splitlines() if line}
def changed_files_since(base_ref: str, head_ref: str = "HEAD") -> set[str]:
out = git("diff", "--name-only", f"{base_ref}..{head_ref}")
return {line for line in out.splitlines() if line}
def read_json(path: Path) -> dict:
if not path.exists():
raise FileNotFoundError(path)
@@ -72,7 +78,87 @@ def is_main_branch() -> bool:
return current_branch() == "main"
def get_orig_head() -> str | None:
try:
return git("rev-parse", "--verify", "ORIG_HEAD")
except subprocess.CalledProcessError:
return None
def read_best_from_ref(ref: str) -> float:
try:
raw = git("show", f"{ref}:BEST_PRACTICE.json")
except subprocess.CalledProcessError:
return float("inf")
try:
parsed = json.loads(raw)
except json.JSONDecodeError:
return float("inf")
return float(parsed.get("best_mean_rmsd_100", float("inf")))
def run_main_gate(previous_best: float, mode: str) -> int:
prefix = "[post-merge]" if mode == "post-merge" else "[pre-commit]"
latest = read_json(LATEST)
latest_rmsd = float(latest["mean_rmsd_100"])
latest_runs = int(latest["num_runs"])
if latest_runs != 100:
return fail(f"{prefix} num_runs must be 100, got {latest_runs}.")
if not (latest_rmsd < previous_best):
return fail(
f"{prefix} No improvement: latest={latest_rmsd:.6f}, previous_best={previous_best:.6f}. "
"main integration with train.py requires strict improvement."
)
best_report = {
"best_mean_rmsd_100": latest_rmsd,
"num_runs": latest_runs,
"timestamp_utc": latest.get("timestamp_utc", ""),
"command": latest.get("command", ""),
"notes": "Auto-updated by pre-commit from reports/latest_eval.json.",
"updated_by_commit": "pending",
"best_train_mse": latest.get("best_train_mse"),
}
write_json(BEST, best_report)
shutil.copy2(LATEST_MODEL, BEST_MODEL)
subprocess.check_call([sys.executable, str(ROOT / "scripts" / "update_best_artifacts.py")], cwd=ROOT)
if mode == "pre-commit":
subprocess.check_call(["git", "add", str(BEST)], cwd=ROOT)
subprocess.check_call(["git", "add", "reports/trajectories"], cwd=ROOT)
subprocess.check_call(["git", "add", "artifacts/best_model.pt"], cwd=ROOT)
subprocess.check_call(["git", "add", "artifacts/latest_eval_best_model.pt"], cwd=ROOT)
print(
f"{prefix} PASS: improved mean_rmsd_100 {previous_best:.6f} -> {latest_rmsd:.6f}; "
"BEST_PRACTICE.json and best artifacts refreshed",
file=sys.stderr,
)
return 0
def main() -> int:
hook_stage = os.environ.get("PRE_COMMIT_HOOK_STAGE", "pre-commit")
if hook_stage == "post-merge":
if not is_main_branch():
return 0
orig_head = get_orig_head()
if not orig_head:
return 0
changed = changed_files_since(orig_head, "HEAD")
if "train.py" not in changed:
return 0
train_src = (ROOT / "train.py").read_text(encoding="utf-8")
try:
enforce_random_time_requirement(train_src)
except RuntimeError as e:
return fail(f"[post-merge] {e}")
if "reports/latest_eval.json" not in changed:
return fail("[post-merge] train.py merged to main: reports/latest_eval.json must be included.")
if "README.md" not in changed:
return fail("[post-merge] train.py merged to main: README.md attempt log update is required.")
if not LATEST_MODEL.exists():
return fail("[post-merge] Missing artifacts/latest_eval_best_model.pt from merged training run.")
previous_best = read_best_from_ref(orig_head)
return run_main_gate(previous_best=previous_best, mode="post-merge")
staged = staged_files()
if "train.py" not in staged:
return 0
@@ -84,7 +170,6 @@ def main() -> int:
enforce_random_time_requirement(train_src)
except RuntimeError as e:
return fail(str(e))
if not is_main_branch():
print(
"[pre-commit] train.py staged on a feature branch: "
@@ -92,51 +177,14 @@ def main() -> int:
file=sys.stderr,
)
return 0
if "reports/latest_eval.json" not in staged:
return fail("train.py changed: stage reports/latest_eval.json too.")
if "README.md" not in staged:
return fail("train.py changed: stage README.md with attempt log update.")
if not LATEST_MODEL.exists():
return fail("Missing artifacts/latest_eval_best_model.pt from training run.")
latest = read_json(LATEST)
latest_rmsd = float(latest["mean_rmsd_100"])
latest_runs = int(latest["num_runs"])
if latest_runs != 100:
return fail(f"num_runs must be 100, got {latest_runs}.")
previous_best = read_head_best()
if not (latest_rmsd < previous_best):
return fail(
f"No improvement: latest={latest_rmsd:.6f}, previous_best={previous_best:.6f}. "
"train.py commits require strict improvement."
)
best_report = {
"best_mean_rmsd_100": latest_rmsd,
"num_runs": latest_runs,
"timestamp_utc": latest.get("timestamp_utc", ""),
"command": latest.get("command", ""),
"notes": "Auto-updated by pre-commit from reports/latest_eval.json.",
"updated_by_commit": "pending",
"best_train_mse": latest.get("best_train_mse"),
}
write_json(BEST, best_report)
subprocess.check_call(["git", "add", str(BEST)], cwd=ROOT)
shutil.copy2(LATEST_MODEL, BEST_MODEL)
subprocess.check_call([sys.executable, str(ROOT / "scripts" / "update_best_artifacts.py")], cwd=ROOT)
subprocess.check_call(["git", "add", "reports/trajectories"], cwd=ROOT)
subprocess.check_call(["git", "add", "artifacts/best_model.pt"], cwd=ROOT)
subprocess.check_call(["git", "add", "artifacts/latest_eval_best_model.pt"], cwd=ROOT)
print(
f"[pre-commit] PASS: improved mean_rmsd_100 {previous_best:.6f} -> {latest_rmsd:.6f}; "
"BEST_PRACTICE.json auto-updated",
file=sys.stderr,
)
return 0
return run_main_gate(previous_best=previous_best, mode="pre-commit")
if __name__ == "__main__":