Add pluggable LLM support with Gemini provider

- Add LLMProvider registry (llm/registry.py) that builds a provider from
  env vars (LLM_PROVIDER, GEMINI_API_KEY, GEMINI_MODEL)
- Add GeminiLLMProvider using the google-genai SDK
- Wire build_llm_provider() into CLI and web pipeline route (replacing llm=None)
- Wrap pass 2 and pass 4 LLM calls in per-combo try/except so API errors
  skip individual combos rather than aborting the whole run
- Add gemini optional dep to pyproject.toml; Dockerfile installs [web,gemini]
- Document env vars in .env.example and README
- Lower requires-python to >=3.10 to match installed system Python

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-18 22:04:35 -06:00
parent f1b3c75190
commit 20dae0dce3
10 changed files with 204 additions and 40 deletions

View File

@@ -184,9 +184,12 @@ class Pipeline:
if 2 in passes and existing_pass < 2:
description = _describe_combination(combo)
if self.llm:
raw_metrics = self.llm.estimate_physics(
description, metric_names
)
try:
raw_metrics = self.llm.estimate_physics(
description, metric_names
)
except Exception:
raw_metrics = self._stub_estimate(combo, metric_names)
else:
raw_metrics = self._stub_estimate(combo, metric_names)
@@ -284,32 +287,34 @@ class Pipeline:
and cur_result["composite_score"] is not None
and cur_result["composite_score"] >= score_threshold
):
description = _describe_combination(combo)
db_scores = self.repo.get_combination_scores(
combo.id, domain.id
)
score_dict = {
s["metric_name"]: s["normalized_score"]
for s in db_scores
if s["normalized_score"] is not None
}
review = self.llm.review_plausibility(
description, score_dict
)
self.repo.save_result(
combo.id,
domain.id,
cur_result["composite_score"],
pass_reached=4,
novelty_flag=cur_result.get("novelty_flag"),
llm_review=review,
human_notes=cur_result.get("human_notes"),
)
result.pass4_reviewed += 1
self._update_run_counters(
run_id, result, current_pass=4
)
try:
description = _describe_combination(combo)
db_scores = self.repo.get_combination_scores(
combo.id, domain.id
)
score_dict = {
s["metric_name"]: s["normalized_score"]
for s in db_scores
if s["normalized_score"] is not None
}
review = self.llm.review_plausibility(
description, score_dict
)
self.repo.save_result(
combo.id,
domain.id,
cur_result["composite_score"],
pass_reached=4,
novelty_flag=cur_result.get("novelty_flag"),
llm_review=review,
human_notes=cur_result.get("human_notes"),
)
result.pass4_reviewed += 1
self._update_run_counters(
run_id, result, current_pass=4
)
except Exception:
pass # skip this combo; don't abort the run
except CancelledError:
if run_id is not None: