Handle LLM rate limits gracefully — stop and resume rather than fail
- Add LLMRateLimitError to llm/base.py (provider-agnostic) - GeminiLLMProvider raises it on 429/RESOURCE_EXHAUSTED responses - Pipeline catches it, marks the run completed (not failed), and returns partial results — already-reviewed combos are saved, and re-running pass 4 resumes from where it left off Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,6 +5,14 @@ from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class LLMRateLimitError(Exception):
|
||||
"""Raised by a provider when the API rate limit is exceeded.
|
||||
|
||||
The pipeline catches this to stop gracefully and let the user re-run
|
||||
to continue from where it left off.
|
||||
"""
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
"""Abstract LLM interface for physics estimation and plausibility review."""
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import re
|
||||
|
||||
from physcom.llm.base import LLMProvider
|
||||
from physcom.llm.base import LLMProvider, LLMRateLimitError
|
||||
from physcom.llm.prompts import PHYSICS_ESTIMATION_PROMPT, PLAUSIBILITY_REVIEW_PROMPT
|
||||
|
||||
|
||||
@@ -29,9 +29,14 @@ class GeminiLLMProvider(LLMProvider):
|
||||
description=combination_description,
|
||||
metrics=", ".join(metrics),
|
||||
)
|
||||
response = self._client.models.generate_content(
|
||||
model=self._model, contents=prompt
|
||||
)
|
||||
try:
|
||||
response = self._client.models.generate_content(
|
||||
model=self._model, contents=prompt
|
||||
)
|
||||
except Exception as exc:
|
||||
if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
|
||||
raise LLMRateLimitError(str(exc)) from exc
|
||||
raise
|
||||
return self._parse_json(response.text, metrics)
|
||||
|
||||
def review_plausibility(
|
||||
@@ -42,9 +47,14 @@ class GeminiLLMProvider(LLMProvider):
|
||||
description=combination_description,
|
||||
scores=scores_str,
|
||||
)
|
||||
response = self._client.models.generate_content(
|
||||
model=self._model, contents=prompt
|
||||
)
|
||||
try:
|
||||
response = self._client.models.generate_content(
|
||||
model=self._model, contents=prompt
|
||||
)
|
||||
except Exception as exc:
|
||||
if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
|
||||
raise LLMRateLimitError(str(exc)) from exc
|
||||
raise
|
||||
return response.text.strip()
|
||||
|
||||
def _parse_json(self, text: str, metrics: list[str]) -> dict[str, float]:
|
||||
|
||||
Reference in New Issue
Block a user