Handle LLM rate limits gracefully — stop and resume rather than fail

- Add LLMRateLimitError to llm/base.py (provider-agnostic)
- GeminiLLMProvider raises it on 429/RESOURCE_EXHAUSTED responses
- Pipeline catches it, marks the run completed (not failed), and returns
  partial results — already-reviewed combos are saved, and re-running
  pass 4 resumes from where it left off

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-18 22:36:13 -06:00
parent 20dae0dce3
commit ee885b2390
3 changed files with 65 additions and 42 deletions

View File

@@ -9,7 +9,7 @@ from physcom.db.repository import Repository
from physcom.engine.combinator import generate_combinations from physcom.engine.combinator import generate_combinations
from physcom.engine.constraint_resolver import ConstraintResolver, ConstraintResult from physcom.engine.constraint_resolver import ConstraintResolver, ConstraintResult
from physcom.engine.scorer import Scorer from physcom.engine.scorer import Scorer
from physcom.llm.base import LLMProvider from physcom.llm.base import LLMProvider, LLMRateLimitError
from physcom.models.combination import Combination, ScoredResult from physcom.models.combination import Combination, ScoredResult
from physcom.models.domain import Domain from physcom.models.domain import Domain
@@ -184,12 +184,9 @@ class Pipeline:
if 2 in passes and existing_pass < 2: if 2 in passes and existing_pass < 2:
description = _describe_combination(combo) description = _describe_combination(combo)
if self.llm: if self.llm:
try: raw_metrics = self.llm.estimate_physics(
raw_metrics = self.llm.estimate_physics( description, metric_names
description, metric_names )
)
except Exception:
raw_metrics = self._stub_estimate(combo, metric_names)
else: else:
raw_metrics = self._stub_estimate(combo, metric_names) raw_metrics = self._stub_estimate(combo, metric_names)
@@ -287,34 +284,31 @@ class Pipeline:
and cur_result["composite_score"] is not None and cur_result["composite_score"] is not None
and cur_result["composite_score"] >= score_threshold and cur_result["composite_score"] >= score_threshold
): ):
try: description = _describe_combination(combo)
description = _describe_combination(combo) db_scores = self.repo.get_combination_scores(
db_scores = self.repo.get_combination_scores( combo.id, domain.id
combo.id, domain.id )
) score_dict = {
score_dict = { s["metric_name"]: s["normalized_score"]
s["metric_name"]: s["normalized_score"] for s in db_scores
for s in db_scores if s["normalized_score"] is not None
if s["normalized_score"] is not None }
} review = self.llm.review_plausibility(
review = self.llm.review_plausibility( description, score_dict
description, score_dict )
) self.repo.save_result(
self.repo.save_result( combo.id,
combo.id, domain.id,
domain.id, cur_result["composite_score"],
cur_result["composite_score"], pass_reached=4,
pass_reached=4, novelty_flag=cur_result.get("novelty_flag"),
novelty_flag=cur_result.get("novelty_flag"), llm_review=review,
llm_review=review, human_notes=cur_result.get("human_notes"),
human_notes=cur_result.get("human_notes"), )
) result.pass4_reviewed += 1
result.pass4_reviewed += 1 self._update_run_counters(
self._update_run_counters( run_id, result, current_pass=4
run_id, result, current_pass=4 )
)
except Exception:
pass # skip this combo; don't abort the run
except CancelledError: except CancelledError:
if run_id is not None: if run_id is not None:
@@ -325,6 +319,17 @@ class Pipeline:
) )
result.top_results = self.repo.get_top_results(domain.name, limit=20) result.top_results = self.repo.get_top_results(domain.name, limit=20)
return result return result
except LLMRateLimitError:
# Rate limit hit — save progress and let the user re-run to continue.
# Already-reviewed combos are persisted; resumability skips them next time.
if run_id is not None:
self.repo.update_pipeline_run(
run_id,
status="completed",
completed_at=datetime.now(timezone.utc).isoformat(),
)
result.top_results = self.repo.get_top_results(domain.name, limit=20)
return result
# Mark run as completed # Mark run as completed
if run_id is not None: if run_id is not None:

View File

@@ -5,6 +5,14 @@ from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
class LLMRateLimitError(Exception):
"""Raised by a provider when the API rate limit is exceeded.
The pipeline catches this to stop gracefully and let the user re-run
to continue from where it left off.
"""
class LLMProvider(ABC): class LLMProvider(ABC):
"""Abstract LLM interface for physics estimation and plausibility review.""" """Abstract LLM interface for physics estimation and plausibility review."""

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
import json import json
import re import re
from physcom.llm.base import LLMProvider from physcom.llm.base import LLMProvider, LLMRateLimitError
from physcom.llm.prompts import PHYSICS_ESTIMATION_PROMPT, PLAUSIBILITY_REVIEW_PROMPT from physcom.llm.prompts import PHYSICS_ESTIMATION_PROMPT, PLAUSIBILITY_REVIEW_PROMPT
@@ -29,9 +29,14 @@ class GeminiLLMProvider(LLMProvider):
description=combination_description, description=combination_description,
metrics=", ".join(metrics), metrics=", ".join(metrics),
) )
response = self._client.models.generate_content( try:
model=self._model, contents=prompt response = self._client.models.generate_content(
) model=self._model, contents=prompt
)
except Exception as exc:
if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
raise LLMRateLimitError(str(exc)) from exc
raise
return self._parse_json(response.text, metrics) return self._parse_json(response.text, metrics)
def review_plausibility( def review_plausibility(
@@ -42,9 +47,14 @@ class GeminiLLMProvider(LLMProvider):
description=combination_description, description=combination_description,
scores=scores_str, scores=scores_str,
) )
response = self._client.models.generate_content( try:
model=self._model, contents=prompt response = self._client.models.generate_content(
) model=self._model, contents=prompt
)
except Exception as exc:
if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
raise LLMRateLimitError(str(exc)) from exc
raise
return response.text.strip() return response.text.strip()
def _parse_json(self, text: str, metrics: list[str]) -> dict[str, float]: def _parse_json(self, text: str, metrics: list[str]) -> dict[str, float]: