QoL and metric value inverter
This commit is contained in:
@@ -21,12 +21,15 @@ class PipelineResult:
|
||||
|
||||
total_generated: int = 0
|
||||
pass1_valid: int = 0
|
||||
pass1_blocked: int = 0
|
||||
pass1_failed: int = 0
|
||||
pass1_conditional: int = 0
|
||||
pass2_estimated: int = 0
|
||||
pass2_failed: int = 0
|
||||
pass3_scored: int = 0
|
||||
pass3_above_threshold: int = 0
|
||||
pass3_failed: int = 0
|
||||
pass4_reviewed: int = 0
|
||||
pass4_failed: int = 0
|
||||
pass5_human_reviewed: int = 0
|
||||
top_results: list[dict] = field(default_factory=list)
|
||||
|
||||
@@ -77,7 +80,7 @@ class Pipeline:
|
||||
run_id,
|
||||
combos_pass1=result.pass1_valid
|
||||
+ result.pass1_conditional
|
||||
+ result.pass1_blocked,
|
||||
+ result.pass1_failed,
|
||||
combos_pass2=result.pass2_estimated,
|
||||
combos_pass3=result.pass3_scored,
|
||||
combos_pass4=result.pass4_reviewed,
|
||||
@@ -142,22 +145,22 @@ class Pipeline:
|
||||
# ── Pass 1: Constraint Resolution ────────────────
|
||||
if 1 in passes and existing_pass < 1:
|
||||
cr: ConstraintResult = self.resolver.resolve(combo)
|
||||
if cr.status == "blocked":
|
||||
combo.status = "blocked"
|
||||
if cr.status == "p1_fail":
|
||||
combo.status = "p1_fail"
|
||||
combo.block_reason = "; ".join(cr.violations)
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "blocked", combo.block_reason
|
||||
combo.id, "p1_fail", combo.block_reason
|
||||
)
|
||||
# Save a result row so blocked combos appear in results
|
||||
# Save a result row so failed combos appear in results
|
||||
self.repo.save_result(
|
||||
combo.id,
|
||||
domain.id,
|
||||
composite_score=0.0,
|
||||
pass_reached=1,
|
||||
)
|
||||
result.pass1_blocked += 1
|
||||
result.pass1_failed += 1
|
||||
self._update_run_counters(run_id, result, current_pass=1)
|
||||
continue # blocked — skip remaining passes
|
||||
continue # p1_fail — skip remaining passes
|
||||
else:
|
||||
combo.status = "valid"
|
||||
self.repo.update_combination_status(combo.id, "valid")
|
||||
@@ -168,16 +171,16 @@ class Pipeline:
|
||||
|
||||
self._update_run_counters(run_id, result, current_pass=1)
|
||||
elif 1 in passes:
|
||||
# Already pass1'd — check if it was blocked
|
||||
if combo.status == "blocked":
|
||||
result.pass1_blocked += 1
|
||||
# Already pass1'd — check if it failed
|
||||
if combo.status.endswith("_fail"):
|
||||
result.pass1_failed += 1
|
||||
continue
|
||||
else:
|
||||
result.pass1_valid += 1
|
||||
else:
|
||||
# Pass 1 not requested; check if blocked from a prior run
|
||||
if combo.status == "blocked":
|
||||
result.pass1_blocked += 1
|
||||
# Pass 1 not requested; check if failed from a prior run
|
||||
if combo.status.endswith("_fail"):
|
||||
result.pass1_failed += 1
|
||||
continue
|
||||
|
||||
# ── Pass 2: Physics Estimation ───────────────────
|
||||
@@ -207,6 +210,21 @@ class Pipeline:
|
||||
combo.id, domain.id, estimate_dicts
|
||||
)
|
||||
|
||||
# Check for all-zero estimates → p2_fail
|
||||
if raw_metrics and all(v == 0.0 for v in raw_metrics.values()):
|
||||
combo.status = "p2_fail"
|
||||
combo.block_reason = "All metric estimates are zero"
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "p2_fail", combo.block_reason
|
||||
)
|
||||
self.repo.save_result(
|
||||
combo.id, domain.id,
|
||||
composite_score=0.0, pass_reached=2,
|
||||
)
|
||||
result.pass2_failed += 1
|
||||
self._update_run_counters(run_id, result, current_pass=2)
|
||||
continue
|
||||
|
||||
result.pass2_estimated += 1
|
||||
self._update_run_counters(run_id, result, current_pass=2)
|
||||
elif 2 in passes:
|
||||
@@ -249,6 +267,26 @@ class Pipeline:
|
||||
existing_result["human_notes"] if existing_result else None
|
||||
)
|
||||
|
||||
if sr.composite_score < score_threshold:
|
||||
self.repo.save_result(
|
||||
combo.id, domain.id,
|
||||
sr.composite_score, pass_reached=3,
|
||||
novelty_flag=novelty_flag,
|
||||
human_notes=human_notes,
|
||||
)
|
||||
combo.status = "p3_fail"
|
||||
combo.block_reason = (
|
||||
f"Composite score {sr.composite_score:.4f} "
|
||||
f"below threshold {score_threshold}"
|
||||
)
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "p3_fail", combo.block_reason
|
||||
)
|
||||
result.pass3_failed += 1
|
||||
result.pass3_scored += 1
|
||||
self._update_run_counters(run_id, result, current_pass=3)
|
||||
continue
|
||||
|
||||
self.repo.save_result(
|
||||
combo.id,
|
||||
domain.id,
|
||||
@@ -260,8 +298,7 @@ class Pipeline:
|
||||
self.repo.update_combination_status(combo.id, "scored")
|
||||
|
||||
result.pass3_scored += 1
|
||||
if sr.composite_score >= score_threshold:
|
||||
result.pass3_above_threshold += 1
|
||||
result.pass3_above_threshold += 1
|
||||
|
||||
self._update_run_counters(run_id, result, current_pass=3)
|
||||
elif 3 in passes and existing_pass >= 3:
|
||||
@@ -294,33 +331,49 @@ class Pipeline:
|
||||
for s in db_scores
|
||||
if s["normalized_score"] is not None
|
||||
}
|
||||
review: str | None = None
|
||||
review_result: tuple[str, bool] | None = None
|
||||
try:
|
||||
review = self.llm.review_plausibility(
|
||||
review_result = self.llm.review_plausibility(
|
||||
description, score_dict
|
||||
)
|
||||
except LLMRateLimitError as exc:
|
||||
self._wait_for_rate_limit(run_id, exc.retry_after)
|
||||
try:
|
||||
review = self.llm.review_plausibility(
|
||||
review_result = self.llm.review_plausibility(
|
||||
description, score_dict
|
||||
)
|
||||
except LLMRateLimitError:
|
||||
pass # still limited; skip, retry next run
|
||||
if review is not None:
|
||||
self.repo.save_result(
|
||||
combo.id,
|
||||
domain.id,
|
||||
cur_result["composite_score"],
|
||||
pass_reached=4,
|
||||
novelty_flag=cur_result.get("novelty_flag"),
|
||||
llm_review=review,
|
||||
human_notes=cur_result.get("human_notes"),
|
||||
)
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "llm_reviewed"
|
||||
)
|
||||
result.pass4_reviewed += 1
|
||||
if review_result is not None:
|
||||
review_text, plausible = review_result
|
||||
if not plausible:
|
||||
self.repo.save_result(
|
||||
combo.id, domain.id,
|
||||
cur_result["composite_score"],
|
||||
pass_reached=4,
|
||||
novelty_flag=cur_result.get("novelty_flag"),
|
||||
llm_review=review_text,
|
||||
human_notes=cur_result.get("human_notes"),
|
||||
)
|
||||
combo.status = "p4_fail"
|
||||
combo.block_reason = "LLM deemed implausible"
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "p4_fail", combo.block_reason
|
||||
)
|
||||
result.pass4_failed += 1
|
||||
else:
|
||||
self.repo.save_result(
|
||||
combo.id, domain.id,
|
||||
cur_result["composite_score"],
|
||||
pass_reached=4,
|
||||
novelty_flag=cur_result.get("novelty_flag"),
|
||||
llm_review=review_text,
|
||||
human_notes=cur_result.get("human_notes"),
|
||||
)
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "llm_reviewed"
|
||||
)
|
||||
result.pass4_reviewed += 1
|
||||
self._update_run_counters(
|
||||
run_id, result, current_pass=4
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user