Add async pipeline with progress monitoring, resumability, and result transparency
Pipeline engine rewritten with combo-first loop: each combination is processed through all requested passes before moving to the next, with incremental DB saves after every step (crash-safe). Blocked combos now get result rows so they appear in the results page with constraint violation reasons. New pipeline_runs table tracks run lifecycle (pending/running/completed/failed/ cancelled). Web route launches pipeline in a background thread with its own DB connection. HTMX polling partial shows live progress with per-pass breakdown. Also: status guard prevents reviewed->scored downgrade, save_combination loads existing status on dedup for correct resume, per-metric scores show domain bounds + units + position bars, ensure_metric backfills units on existing rows. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone
|
||||
from typing import Sequence
|
||||
|
||||
from physcom.models.entity import Dependency, Entity
|
||||
@@ -170,6 +172,11 @@ class Repository:
|
||||
"INSERT OR IGNORE INTO metrics (name, unit, description) VALUES (?, ?, ?)",
|
||||
(name, unit, description),
|
||||
)
|
||||
if unit:
|
||||
self.conn.execute(
|
||||
"UPDATE metrics SET unit = ? WHERE name = ? AND (unit IS NULL OR unit = '')",
|
||||
(unit, name),
|
||||
)
|
||||
row = self.conn.execute("SELECT id FROM metrics WHERE name = ?", (name,)).fetchone()
|
||||
self.conn.commit()
|
||||
return row["id"]
|
||||
@@ -181,7 +188,7 @@ class Repository:
|
||||
)
|
||||
domain.id = cur.lastrowid
|
||||
for mb in domain.metric_bounds:
|
||||
metric_id = self.ensure_metric(mb.metric_name)
|
||||
metric_id = self.ensure_metric(mb.metric_name, unit=mb.unit)
|
||||
mb.metric_id = metric_id
|
||||
self.conn.execute(
|
||||
"""INSERT INTO domain_metric_weights
|
||||
@@ -233,10 +240,13 @@ class Repository:
|
||||
combination.hash = self.compute_hash(entity_ids)
|
||||
|
||||
existing = self.conn.execute(
|
||||
"SELECT id FROM combinations WHERE hash = ?", (combination.hash,)
|
||||
"SELECT id, status, block_reason FROM combinations WHERE hash = ?",
|
||||
(combination.hash,),
|
||||
).fetchone()
|
||||
if existing:
|
||||
combination.id = existing["id"]
|
||||
combination.status = existing["status"]
|
||||
combination.block_reason = existing["block_reason"]
|
||||
return combination
|
||||
|
||||
cur = self.conn.execute(
|
||||
@@ -255,6 +265,13 @@ class Repository:
|
||||
def update_combination_status(
|
||||
self, combo_id: int, status: str, block_reason: str | None = None
|
||||
) -> None:
|
||||
# Don't downgrade 'reviewed' to 'scored' — preserve human review state
|
||||
if status == "scored":
|
||||
row = self.conn.execute(
|
||||
"SELECT status FROM combinations WHERE id = ?", (combo_id,)
|
||||
).fetchone()
|
||||
if row and row["status"] == "reviewed":
|
||||
return
|
||||
self.conn.execute(
|
||||
"UPDATE combinations SET status = ?, block_reason = ? WHERE id = ?",
|
||||
(status, block_reason, combo_id),
|
||||
@@ -327,7 +344,7 @@ class Repository:
|
||||
def get_combination_scores(self, combo_id: int, domain_id: int) -> list[dict]:
|
||||
"""Return per-metric scores for a combination in a domain."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT cs.*, m.name as metric_name
|
||||
"""SELECT cs.*, m.name as metric_name, m.unit as metric_unit
|
||||
FROM combination_scores cs
|
||||
JOIN metrics m ON cs.metric_id = m.id
|
||||
WHERE cs.combination_id = ? AND cs.domain_id = ?""",
|
||||
@@ -335,12 +352,52 @@ class Repository:
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def count_combinations_by_status(self) -> dict[str, int]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT status, COUNT(*) as cnt FROM combinations GROUP BY status"
|
||||
).fetchall()
|
||||
def count_combinations_by_status(self, domain_name: str | None = None) -> dict[str, int]:
|
||||
"""Count combos by status. If domain_name given, only combos with results in that domain."""
|
||||
if domain_name:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT c.status, COUNT(*) as cnt
|
||||
FROM combination_results cr
|
||||
JOIN combinations c ON cr.combination_id = c.id
|
||||
JOIN domains d ON cr.domain_id = d.id
|
||||
WHERE d.name = ?
|
||||
GROUP BY c.status""",
|
||||
(domain_name,),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = self.conn.execute(
|
||||
"SELECT status, COUNT(*) as cnt FROM combinations GROUP BY status"
|
||||
).fetchall()
|
||||
return {r["status"]: r["cnt"] for r in rows}
|
||||
|
||||
def get_pipeline_summary(self, domain_name: str) -> dict | None:
|
||||
"""Return a summary of results for a domain, or None if no results."""
|
||||
row = self.conn.execute(
|
||||
"""SELECT COUNT(*) as total,
|
||||
AVG(cr.composite_score) as avg_score,
|
||||
MAX(cr.composite_score) as max_score,
|
||||
MIN(cr.composite_score) as min_score,
|
||||
MAX(cr.pass_reached) as last_pass
|
||||
FROM combination_results cr
|
||||
JOIN domains d ON cr.domain_id = d.id
|
||||
WHERE d.name = ?""",
|
||||
(domain_name,),
|
||||
).fetchone()
|
||||
if not row or row["total"] == 0:
|
||||
return None
|
||||
# Also count blocked combos (they have no results but exist)
|
||||
blocked = self.conn.execute(
|
||||
"SELECT COUNT(*) as cnt FROM combinations WHERE status = 'blocked'"
|
||||
).fetchone()
|
||||
return {
|
||||
"total_results": row["total"],
|
||||
"avg_score": row["avg_score"],
|
||||
"max_score": row["max_score"],
|
||||
"min_score": row["min_score"],
|
||||
"last_pass": row["last_pass"],
|
||||
"blocked": blocked["cnt"] if blocked else 0,
|
||||
}
|
||||
|
||||
def get_result(self, combo_id: int, domain_id: int) -> dict | None:
|
||||
"""Return a single combination_result row."""
|
||||
row = self.conn.execute(
|
||||
@@ -412,3 +469,88 @@ class Repository:
|
||||
"pass_reached": r["pass_reached"],
|
||||
})
|
||||
return results
|
||||
|
||||
# ── Pipeline Runs ────────────────────────────────────────
|
||||
|
||||
def create_pipeline_run(self, domain_id: int, config: dict) -> int:
|
||||
"""Create a new pipeline_run record. Returns the run id."""
|
||||
cur = self.conn.execute(
|
||||
"""INSERT INTO pipeline_runs (domain_id, status, config, created_at)
|
||||
VALUES (?, 'pending', ?, ?)""",
|
||||
(domain_id, json.dumps(config), datetime.now(timezone.utc).isoformat()),
|
||||
)
|
||||
self.conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def update_pipeline_run(self, run_id: int, **fields) -> None:
|
||||
"""Update arbitrary fields on a pipeline_run."""
|
||||
if not fields:
|
||||
return
|
||||
set_clause = ", ".join(f"{k} = ?" for k in fields)
|
||||
values = list(fields.values())
|
||||
values.append(run_id)
|
||||
self.conn.execute(
|
||||
f"UPDATE pipeline_runs SET {set_clause} WHERE id = ?", values
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_pipeline_run(self, run_id: int) -> dict | None:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM pipeline_runs WHERE id = ?", (run_id,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def list_pipeline_runs(self, domain_id: int | None = None) -> list[dict]:
|
||||
if domain_id is not None:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT pr.*, d.name as domain_name
|
||||
FROM pipeline_runs pr
|
||||
JOIN domains d ON pr.domain_id = d.id
|
||||
WHERE pr.domain_id = ?
|
||||
ORDER BY pr.created_at DESC""",
|
||||
(domain_id,),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT pr.*, d.name as domain_name
|
||||
FROM pipeline_runs pr
|
||||
JOIN domains d ON pr.domain_id = d.id
|
||||
ORDER BY pr.created_at DESC"""
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def get_combo_pass_reached(self, combo_id: int, domain_id: int) -> int | None:
|
||||
"""Return the pass_reached for a combo in a domain, or None if no result."""
|
||||
row = self.conn.execute(
|
||||
"""SELECT pass_reached FROM combination_results
|
||||
WHERE combination_id = ? AND domain_id = ?""",
|
||||
(combo_id, domain_id),
|
||||
).fetchone()
|
||||
return row["pass_reached"] if row else None
|
||||
|
||||
def save_raw_estimates(
|
||||
self, combo_id: int, domain_id: int, estimates: list[dict]
|
||||
) -> None:
|
||||
"""Save raw metric estimates (pass 2) with normalized_score=NULL.
|
||||
|
||||
Each dict: metric_id, raw_value, estimation_method, confidence.
|
||||
"""
|
||||
for e in estimates:
|
||||
self.conn.execute(
|
||||
"""INSERT OR REPLACE INTO combination_scores
|
||||
(combination_id, domain_id, metric_id, raw_value, normalized_score,
|
||||
estimation_method, confidence)
|
||||
VALUES (?, ?, ?, ?, NULL, ?, ?)""",
|
||||
(combo_id, domain_id, e["metric_id"], e["raw_value"],
|
||||
e["estimation_method"], e["confidence"]),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_existing_result(self, combo_id: int, domain_id: int) -> dict | None:
|
||||
"""Return the full combination_results row for resume logic."""
|
||||
row = self.conn.execute(
|
||||
"""SELECT * FROM combination_results
|
||||
WHERE combination_id = ? AND domain_id = ?""",
|
||||
(combo_id, domain_id),
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
@@ -91,11 +91,29 @@ CREATE TABLE IF NOT EXISTS combination_results (
|
||||
UNIQUE(combination_id, domain_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS pipeline_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
domain_id INTEGER NOT NULL REFERENCES domains(id),
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
config TEXT,
|
||||
total_combos INTEGER DEFAULT 0,
|
||||
combos_pass1 INTEGER DEFAULT 0,
|
||||
combos_pass2 INTEGER DEFAULT 0,
|
||||
combos_pass3 INTEGER DEFAULT 0,
|
||||
combos_pass4 INTEGER DEFAULT 0,
|
||||
current_pass INTEGER,
|
||||
error_message TEXT,
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_deps_entity ON dependencies(entity_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_deps_category_key ON dependencies(category, key);
|
||||
CREATE INDEX IF NOT EXISTS idx_combo_status ON combinations(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_scores_combo_domain ON combination_scores(combination_id, domain_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_results_domain_score ON combination_results(domain_id, composite_score DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_pipeline_runs_domain ON pipeline_runs(domain_id);
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
"""Multi-pass pipeline orchestrator."""
|
||||
"""Multi-pass pipeline orchestrator with incremental saves and resumability."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from physcom.db.repository import Repository
|
||||
from physcom.engine.combinator import generate_combinations
|
||||
from physcom.engine.constraint_resolver import ConstraintResolver, ConstraintResult
|
||||
from physcom.engine.scorer import Scorer
|
||||
from physcom.llm.base import LLMProvider
|
||||
from physcom.llm.prompts import PHYSICS_ESTIMATION_PROMPT, PLAUSIBILITY_REVIEW_PROMPT
|
||||
from physcom.models.combination import Combination, ScoredResult
|
||||
from physcom.models.domain import Domain
|
||||
|
||||
@@ -23,12 +23,17 @@ class PipelineResult:
|
||||
pass1_blocked: int = 0
|
||||
pass1_conditional: int = 0
|
||||
pass2_estimated: int = 0
|
||||
pass3_scored: int = 0
|
||||
pass3_above_threshold: int = 0
|
||||
pass4_reviewed: int = 0
|
||||
pass5_human_reviewed: int = 0
|
||||
top_results: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
class CancelledError(Exception):
|
||||
"""Raised when a pipeline run is cancelled."""
|
||||
|
||||
|
||||
def _describe_combination(combo: Combination) -> str:
|
||||
"""Build a natural-language description of a combination."""
|
||||
parts = [f"{e.dimension}: {e.name}" for e in combo.entities]
|
||||
@@ -53,158 +58,281 @@ class Pipeline:
|
||||
self.scorer = scorer
|
||||
self.llm = llm
|
||||
|
||||
def _check_cancelled(self, run_id: int | None) -> None:
|
||||
"""Raise CancelledError if the run has been cancelled."""
|
||||
if run_id is None:
|
||||
return
|
||||
run = self.repo.get_pipeline_run(run_id)
|
||||
if run and run["status"] == "cancelled":
|
||||
raise CancelledError("Pipeline run cancelled")
|
||||
|
||||
def _update_run_counters(
|
||||
self, run_id: int | None, result: PipelineResult, current_pass: int
|
||||
) -> None:
|
||||
"""Update pipeline_run progress counters in the DB."""
|
||||
if run_id is None:
|
||||
return
|
||||
self.repo.update_pipeline_run(
|
||||
run_id,
|
||||
combos_pass1=result.pass1_valid
|
||||
+ result.pass1_conditional
|
||||
+ result.pass1_blocked,
|
||||
combos_pass2=result.pass2_estimated,
|
||||
combos_pass3=result.pass3_scored,
|
||||
combos_pass4=result.pass4_reviewed,
|
||||
current_pass=current_pass,
|
||||
)
|
||||
|
||||
def run(
|
||||
self,
|
||||
domain: Domain,
|
||||
dimensions: list[str],
|
||||
score_threshold: float = 0.1,
|
||||
passes: list[int] | None = None,
|
||||
run_id: int | None = None,
|
||||
) -> PipelineResult:
|
||||
if passes is None:
|
||||
passes = [1, 2, 3, 4, 5]
|
||||
|
||||
result = PipelineResult()
|
||||
|
||||
# Mark run as running (unless already cancelled)
|
||||
if run_id is not None:
|
||||
run_record = self.repo.get_pipeline_run(run_id)
|
||||
if run_record and run_record["status"] == "cancelled":
|
||||
result.top_results = self.repo.get_top_results(domain.name, limit=20)
|
||||
return result
|
||||
self.repo.update_pipeline_run(
|
||||
run_id,
|
||||
status="running",
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
# Generate all combinations
|
||||
combos = generate_combinations(self.repo, dimensions)
|
||||
result.total_generated = len(combos)
|
||||
|
||||
# Save all combinations to DB
|
||||
# Save all combinations to DB (also loads status for existing combos)
|
||||
for combo in combos:
|
||||
self.repo.save_combination(combo)
|
||||
|
||||
# ── Pass 1: Constraint Resolution ───────────────────────
|
||||
valid_combos: list[Combination] = []
|
||||
if 1 in passes:
|
||||
valid_combos = self._pass1_constraints(combos, result)
|
||||
else:
|
||||
valid_combos = combos
|
||||
if run_id is not None:
|
||||
self.repo.update_pipeline_run(run_id, total_combos=len(combos))
|
||||
|
||||
# ── Pass 2: Physics Estimation ──────────────────────────
|
||||
estimated: list[tuple[Combination, dict[str, float]]] = []
|
||||
if 2 in passes:
|
||||
estimated = self._pass2_estimation(valid_combos, domain, result)
|
||||
else:
|
||||
# Skip estimation, use zeros
|
||||
estimated = [(c, {}) for c in valid_combos]
|
||||
# Prepare metric lookup
|
||||
metric_names = [mb.metric_name for mb in domain.metric_bounds]
|
||||
bounds_by_name = {mb.metric_name: mb for mb in domain.metric_bounds}
|
||||
|
||||
# ── Pass 3: Scoring & Ranking ───────────────────────────
|
||||
scored: list[tuple[Combination, ScoredResult]] = []
|
||||
if 3 in passes:
|
||||
scored = self._pass3_scoring(estimated, domain, score_threshold, result)
|
||||
# ── Combo-first loop ─────────────────────────────────────
|
||||
try:
|
||||
for combo in combos:
|
||||
self._check_cancelled(run_id)
|
||||
|
||||
# ── Pass 4: LLM Review ──────────────────────────────────
|
||||
if 4 in passes and self.llm:
|
||||
self._pass4_llm_review(scored, domain, result)
|
||||
# Check existing progress for this combo in this domain
|
||||
existing_pass = self.repo.get_combo_pass_reached(
|
||||
combo.id, domain.id
|
||||
) or 0
|
||||
|
||||
# ── Save results after scoring ─────────────────────────
|
||||
if 3 in passes:
|
||||
max_pass = max(p for p in passes if p <= 5)
|
||||
for combo, sr in scored:
|
||||
self.repo.save_result(
|
||||
combo.id, domain.id, sr.composite_score,
|
||||
pass_reached=max_pass,
|
||||
novelty_flag=sr.novelty_flag,
|
||||
llm_review=sr.llm_review,
|
||||
# Load existing result to preserve human review data
|
||||
existing_result = self.repo.get_existing_result(
|
||||
combo.id, domain.id
|
||||
)
|
||||
self.repo.update_combination_status(combo.id, "scored")
|
||||
|
||||
# Collect top results
|
||||
# ── Pass 1: Constraint Resolution ────────────────
|
||||
if 1 in passes and existing_pass < 1:
|
||||
cr: ConstraintResult = self.resolver.resolve(combo)
|
||||
if cr.status == "blocked":
|
||||
combo.status = "blocked"
|
||||
combo.block_reason = "; ".join(cr.violations)
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "blocked", combo.block_reason
|
||||
)
|
||||
# Save a result row so blocked combos appear in results
|
||||
self.repo.save_result(
|
||||
combo.id,
|
||||
domain.id,
|
||||
composite_score=0.0,
|
||||
pass_reached=1,
|
||||
)
|
||||
result.pass1_blocked += 1
|
||||
self._update_run_counters(run_id, result, current_pass=1)
|
||||
continue # blocked — skip remaining passes
|
||||
elif cr.status == "conditional":
|
||||
combo.status = "valid"
|
||||
self.repo.update_combination_status(combo.id, "valid")
|
||||
result.pass1_conditional += 1
|
||||
else:
|
||||
combo.status = "valid"
|
||||
self.repo.update_combination_status(combo.id, "valid")
|
||||
result.pass1_valid += 1
|
||||
|
||||
self._update_run_counters(run_id, result, current_pass=1)
|
||||
elif 1 in passes:
|
||||
# Already pass1'd — check if it was blocked
|
||||
if combo.status == "blocked":
|
||||
result.pass1_blocked += 1
|
||||
continue
|
||||
else:
|
||||
result.pass1_valid += 1
|
||||
else:
|
||||
# Pass 1 not requested; check if blocked from a prior run
|
||||
if combo.status == "blocked":
|
||||
result.pass1_blocked += 1
|
||||
continue
|
||||
|
||||
# ── Pass 2: Physics Estimation ───────────────────
|
||||
raw_metrics: dict[str, float] = {}
|
||||
if 2 in passes and existing_pass < 2:
|
||||
description = _describe_combination(combo)
|
||||
if self.llm:
|
||||
raw_metrics = self.llm.estimate_physics(
|
||||
description, metric_names
|
||||
)
|
||||
else:
|
||||
raw_metrics = self._stub_estimate(combo, metric_names)
|
||||
|
||||
# Save raw estimates immediately (crash-safe)
|
||||
estimate_dicts = []
|
||||
for mname, rval in raw_metrics.items():
|
||||
mb = bounds_by_name.get(mname)
|
||||
if mb and mb.metric_id:
|
||||
estimate_dicts.append({
|
||||
"metric_id": mb.metric_id,
|
||||
"raw_value": rval,
|
||||
"estimation_method": "llm" if self.llm else "stub",
|
||||
"confidence": 1.0,
|
||||
})
|
||||
if estimate_dicts:
|
||||
self.repo.save_raw_estimates(
|
||||
combo.id, domain.id, estimate_dicts
|
||||
)
|
||||
|
||||
result.pass2_estimated += 1
|
||||
self._update_run_counters(run_id, result, current_pass=2)
|
||||
elif 2 in passes:
|
||||
# Already estimated — reload raw values from DB
|
||||
existing_scores = self.repo.get_combination_scores(
|
||||
combo.id, domain.id
|
||||
)
|
||||
raw_metrics = {
|
||||
s["metric_name"]: s["raw_value"] for s in existing_scores
|
||||
}
|
||||
result.pass2_estimated += 1
|
||||
else:
|
||||
# Pass 2 not requested, use empty metrics
|
||||
raw_metrics = {}
|
||||
|
||||
# ── Pass 3: Scoring & Ranking ────────────────────
|
||||
if 3 in passes and existing_pass < 3:
|
||||
sr = self.scorer.score_combination(combo, raw_metrics)
|
||||
|
||||
# Persist per-metric scores with normalized values
|
||||
score_dicts = []
|
||||
for s in sr.scores:
|
||||
mb = bounds_by_name.get(s.metric_name)
|
||||
if mb and mb.metric_id:
|
||||
score_dicts.append({
|
||||
"metric_id": mb.metric_id,
|
||||
"raw_value": s.raw_value,
|
||||
"normalized_score": s.normalized_score,
|
||||
"estimation_method": s.estimation_method,
|
||||
"confidence": s.confidence,
|
||||
})
|
||||
if score_dicts:
|
||||
self.repo.save_scores(combo.id, domain.id, score_dicts)
|
||||
|
||||
# Preserve existing human data
|
||||
novelty_flag = (
|
||||
existing_result["novelty_flag"] if existing_result else None
|
||||
)
|
||||
human_notes = (
|
||||
existing_result["human_notes"] if existing_result else None
|
||||
)
|
||||
|
||||
self.repo.save_result(
|
||||
combo.id,
|
||||
domain.id,
|
||||
sr.composite_score,
|
||||
pass_reached=3,
|
||||
novelty_flag=novelty_flag,
|
||||
human_notes=human_notes,
|
||||
)
|
||||
self.repo.update_combination_status(combo.id, "scored")
|
||||
|
||||
result.pass3_scored += 1
|
||||
if sr.composite_score >= score_threshold:
|
||||
result.pass3_above_threshold += 1
|
||||
|
||||
self._update_run_counters(run_id, result, current_pass=3)
|
||||
elif 3 in passes and existing_pass >= 3:
|
||||
# Already scored — count it
|
||||
result.pass3_scored += 1
|
||||
if existing_result and existing_result["composite_score"] is not None:
|
||||
if existing_result["composite_score"] >= score_threshold:
|
||||
result.pass3_above_threshold += 1
|
||||
|
||||
# ── Pass 4: LLM Review ───────────────────────────
|
||||
if 4 in passes and self.llm:
|
||||
cur_pass = self.repo.get_combo_pass_reached(
|
||||
combo.id, domain.id
|
||||
) or 0
|
||||
if cur_pass < 4:
|
||||
cur_result = self.repo.get_existing_result(
|
||||
combo.id, domain.id
|
||||
)
|
||||
if (
|
||||
cur_result
|
||||
and cur_result["composite_score"] is not None
|
||||
and cur_result["composite_score"] >= score_threshold
|
||||
):
|
||||
description = _describe_combination(combo)
|
||||
db_scores = self.repo.get_combination_scores(
|
||||
combo.id, domain.id
|
||||
)
|
||||
score_dict = {
|
||||
s["metric_name"]: s["normalized_score"]
|
||||
for s in db_scores
|
||||
if s["normalized_score"] is not None
|
||||
}
|
||||
review = self.llm.review_plausibility(
|
||||
description, score_dict
|
||||
)
|
||||
|
||||
self.repo.save_result(
|
||||
combo.id,
|
||||
domain.id,
|
||||
cur_result["composite_score"],
|
||||
pass_reached=4,
|
||||
novelty_flag=cur_result.get("novelty_flag"),
|
||||
llm_review=review,
|
||||
human_notes=cur_result.get("human_notes"),
|
||||
)
|
||||
result.pass4_reviewed += 1
|
||||
self._update_run_counters(
|
||||
run_id, result, current_pass=4
|
||||
)
|
||||
|
||||
except CancelledError:
|
||||
if run_id is not None:
|
||||
self.repo.update_pipeline_run(
|
||||
run_id,
|
||||
status="cancelled",
|
||||
completed_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
result.top_results = self.repo.get_top_results(domain.name, limit=20)
|
||||
return result
|
||||
|
||||
# Mark run as completed
|
||||
if run_id is not None:
|
||||
self.repo.update_pipeline_run(
|
||||
run_id,
|
||||
status="completed",
|
||||
completed_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
result.top_results = self.repo.get_top_results(domain.name, limit=20)
|
||||
return result
|
||||
|
||||
def _pass1_constraints(
|
||||
self, combos: list[Combination], result: PipelineResult
|
||||
) -> list[Combination]:
|
||||
valid = []
|
||||
for combo in combos:
|
||||
cr: ConstraintResult = self.resolver.resolve(combo)
|
||||
if cr.status == "blocked":
|
||||
combo.status = "blocked"
|
||||
combo.block_reason = "; ".join(cr.violations)
|
||||
self.repo.update_combination_status(
|
||||
combo.id, "blocked", combo.block_reason
|
||||
)
|
||||
result.pass1_blocked += 1
|
||||
elif cr.status == "conditional":
|
||||
combo.status = "valid"
|
||||
self.repo.update_combination_status(combo.id, "valid")
|
||||
valid.append(combo)
|
||||
result.pass1_conditional += 1
|
||||
else:
|
||||
combo.status = "valid"
|
||||
self.repo.update_combination_status(combo.id, "valid")
|
||||
valid.append(combo)
|
||||
result.pass1_valid += 1
|
||||
return valid
|
||||
|
||||
def _pass2_estimation(
|
||||
self,
|
||||
combos: list[Combination],
|
||||
domain: Domain,
|
||||
result: PipelineResult,
|
||||
) -> list[tuple[Combination, dict[str, float]]]:
|
||||
metric_names = [mb.metric_name for mb in domain.metric_bounds]
|
||||
estimated = []
|
||||
|
||||
for combo in combos:
|
||||
description = _describe_combination(combo)
|
||||
if self.llm:
|
||||
raw_metrics = self.llm.estimate_physics(description, metric_names)
|
||||
else:
|
||||
# Stub estimation: derive from dependencies where possible
|
||||
raw_metrics = self._stub_estimate(combo, metric_names)
|
||||
estimated.append((combo, raw_metrics))
|
||||
result.pass2_estimated += 1
|
||||
|
||||
return estimated
|
||||
|
||||
def _pass3_scoring(
|
||||
self,
|
||||
estimated: list[tuple[Combination, dict[str, float]]],
|
||||
domain: Domain,
|
||||
threshold: float,
|
||||
result: PipelineResult,
|
||||
) -> list[tuple[Combination, ScoredResult]]:
|
||||
scored = []
|
||||
for combo, raw_metrics in estimated:
|
||||
sr = self.scorer.score_combination(combo, raw_metrics)
|
||||
if sr.composite_score >= threshold:
|
||||
scored.append((combo, sr))
|
||||
result.pass3_above_threshold += 1
|
||||
# Persist per-metric scores
|
||||
score_dicts = []
|
||||
bounds_by_name = {mb.metric_name: mb for mb in domain.metric_bounds}
|
||||
for s in sr.scores:
|
||||
mb = bounds_by_name.get(s.metric_name)
|
||||
if mb and mb.metric_id:
|
||||
score_dicts.append({
|
||||
"metric_id": mb.metric_id,
|
||||
"raw_value": s.raw_value,
|
||||
"normalized_score": s.normalized_score,
|
||||
"estimation_method": s.estimation_method,
|
||||
"confidence": s.confidence,
|
||||
})
|
||||
if score_dicts:
|
||||
self.repo.save_scores(combo.id, domain.id, score_dicts)
|
||||
|
||||
# Sort by composite score descending
|
||||
scored.sort(key=lambda x: x[1].composite_score, reverse=True)
|
||||
return scored
|
||||
|
||||
def _pass4_llm_review(
|
||||
self,
|
||||
scored: list[tuple[Combination, ScoredResult]],
|
||||
domain: Domain,
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
for combo, sr in scored:
|
||||
description = _describe_combination(combo)
|
||||
score_dict = {s.metric_name: s.normalized_score for s in sr.scores}
|
||||
review = self.llm.review_plausibility(description, score_dict)
|
||||
sr.llm_review = review
|
||||
result.pass4_reviewed += 1
|
||||
|
||||
def _stub_estimate(
|
||||
self, combo: Combination, metric_names: list[str]
|
||||
) -> dict[str, float]:
|
||||
@@ -223,24 +351,21 @@ class Pipeline:
|
||||
|
||||
# Rough speed estimate: F=ma -> v proportional to power/mass
|
||||
if "speed" in raw and mass_kg > 0:
|
||||
# Very rough: speed ~ power / (mass * drag_coeff)
|
||||
raw["speed"] = min(force_watts / mass_kg * 0.5, 300000)
|
||||
|
||||
if "cost_efficiency" in raw:
|
||||
# Lower force = cheaper per km (roughly)
|
||||
raw["cost_efficiency"] = max(0.01, 2.0 - force_watts / 100000)
|
||||
|
||||
if "safety" in raw:
|
||||
raw["safety"] = 0.5 # default mid-range
|
||||
raw["safety"] = 0.5
|
||||
|
||||
if "availability" in raw:
|
||||
raw["availability"] = 0.5
|
||||
|
||||
if "range_fuel" in raw:
|
||||
# More power = more range (very rough)
|
||||
raw["range_fuel"] = min(force_watts * 0.01, 1e10)
|
||||
|
||||
if "range_degradation" in raw:
|
||||
raw["range_degradation"] = 365 # 1 year default
|
||||
raw["range_degradation"] = 365
|
||||
|
||||
return raw
|
||||
|
||||
@@ -13,6 +13,7 @@ class MetricBound:
|
||||
weight: float # 0.0–1.0
|
||||
norm_min: float # Below this → score 0
|
||||
norm_max: float # Above this → score 1
|
||||
unit: str = ""
|
||||
metric_id: int | None = None
|
||||
|
||||
|
||||
|
||||
@@ -243,11 +243,11 @@ URBAN_COMMUTING = Domain(
|
||||
name="urban_commuting",
|
||||
description="Daily travel within a city, 1-50km range",
|
||||
metric_bounds=[
|
||||
MetricBound("speed", weight=0.25, norm_min=5, norm_max=120),
|
||||
MetricBound("cost_efficiency", weight=0.25, norm_min=0.01, norm_max=2.0),
|
||||
MetricBound("safety", weight=0.25, norm_min=0.0, norm_max=1.0),
|
||||
MetricBound("availability", weight=0.15, norm_min=0.0, norm_max=1.0),
|
||||
MetricBound("range_fuel", weight=0.10, norm_min=5, norm_max=500),
|
||||
MetricBound("speed", weight=0.25, norm_min=5, norm_max=120, unit="km/h"),
|
||||
MetricBound("cost_efficiency", weight=0.25, norm_min=0.01, norm_max=2.0, unit="$/km"),
|
||||
MetricBound("safety", weight=0.25, norm_min=0.0, norm_max=1.0, unit="0-1"),
|
||||
MetricBound("availability", weight=0.15, norm_min=0.0, norm_max=1.0, unit="0-1"),
|
||||
MetricBound("range_fuel", weight=0.10, norm_min=5, norm_max=500, unit="km"),
|
||||
],
|
||||
)
|
||||
|
||||
@@ -255,11 +255,11 @@ INTERPLANETARY = Domain(
|
||||
name="interplanetary_travel",
|
||||
description="Travel between planets within a solar system",
|
||||
metric_bounds=[
|
||||
MetricBound("speed", weight=0.30, norm_min=1000, norm_max=300000),
|
||||
MetricBound("range_fuel", weight=0.30, norm_min=1e6, norm_max=1e10),
|
||||
MetricBound("safety", weight=0.20, norm_min=0.0, norm_max=1.0),
|
||||
MetricBound("cost_efficiency", weight=0.10, norm_min=1e3, norm_max=1e9),
|
||||
MetricBound("range_degradation", weight=0.10, norm_min=100, norm_max=36500),
|
||||
MetricBound("speed", weight=0.30, norm_min=1000, norm_max=300000, unit="km/s"),
|
||||
MetricBound("range_fuel", weight=0.30, norm_min=1e6, norm_max=1e10, unit="km"),
|
||||
MetricBound("safety", weight=0.20, norm_min=0.0, norm_max=1.0, unit="0-1"),
|
||||
MetricBound("cost_efficiency", weight=0.10, norm_min=1e3, norm_max=1e9, unit="$/km"),
|
||||
MetricBound("range_degradation", weight=0.10, norm_min=100, norm_max=36500, unit="days"),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user