Handle LLM rate limits gracefully — stop and resume rather than fail

- Add LLMRateLimitError to llm/base.py (provider-agnostic) - GeminiLLMProvider raises it on 429/RESOURCE_EXHAUSTED responses - Pipeline catches it, marks the run completed (not failed), and returns partial results — already-reviewed combos are saved, and re-running pass 4 resumes from where it left off Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-18 22:36:13 -06:00
parent 20dae0dce3
commit ee885b2390
3 changed files with 65 additions and 42 deletions
--- a/src/physcom/llm/base.py
+++ b/src/physcom/llm/base.py
@@ -5,6 +5,14 @@ from __future__ import annotations
 from abc import ABC, abstractmethod


+class LLMRateLimitError(Exception):
+    """Raised by a provider when the API rate limit is exceeded.
+
+    The pipeline catches this to stop gracefully and let the user re-run
+    to continue from where it left off.
+    """
+
+
 class LLMProvider(ABC):
    """Abstract LLM interface for physics estimation and plausibility review."""

--- a/src/physcom/llm/providers/gemini.py
+++ b/src/physcom/llm/providers/gemini.py
@@ -5,7 +5,7 @@ from __future__ import annotations
 import json
 import re

-from physcom.llm.base import LLMProvider
+from physcom.llm.base import LLMProvider, LLMRateLimitError
 from physcom.llm.prompts import PHYSICS_ESTIMATION_PROMPT, PLAUSIBILITY_REVIEW_PROMPT


@@ -29,9 +29,14 @@ class GeminiLLMProvider(LLMProvider):
            description=combination_description,
            metrics=", ".join(metrics),
        )
-        response = self._client.models.generate_content(
-            model=self._model, contents=prompt
-        )
+        try:
+            response = self._client.models.generate_content(
+                model=self._model, contents=prompt
+            )
+        except Exception as exc:
+            if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
+                raise LLMRateLimitError(str(exc)) from exc
+            raise
        return self._parse_json(response.text, metrics)

    def review_plausibility(
@@ -42,9 +47,14 @@ class GeminiLLMProvider(LLMProvider):
            description=combination_description,
            scores=scores_str,
        )
-        response = self._client.models.generate_content(
-            model=self._model, contents=prompt
-        )
+        try:
+            response = self._client.models.generate_content(
+                model=self._model, contents=prompt
+            )
+        except Exception as exc:
+            if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
+                raise LLMRateLimitError(str(exc)) from exc
+            raise
        return response.text.strip()

    def _parse_json(self, text: str, metrics: list[str]) -> dict[str, float]: