From ee885b2390832fa49bc092f3df4c694a2c4b7c42 Mon Sep 17 00:00:00 2001
From: Andrew Simonson <asimonson1125@gmail.com>
Date: Wed, 18 Feb 2026 22:36:13 -0600
Subject: [PATCH] =?UTF-8?q?Handle=20LLM=20rate=20limits=20gracefully=20?=
 =?UTF-8?q?=E2=80=94=20stop=20and=20resume=20rather=20than=20fail?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add LLMRateLimitError to llm/base.py (provider-agnostic)
- GeminiLLMProvider raises it on 429/RESOURCE_EXHAUSTED responses
- Pipeline catches it, marks the run completed (not failed), and returns
  partial results — already-reviewed combos are saved, and re-running
  pass 4 resumes from where it left off

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/physcom/engine/pipeline.py      | 75 +++++++++++++++--------------
 src/physcom/llm/base.py             |  8 +++
 src/physcom/llm/providers/gemini.py | 24 ++++++---
 3 files changed, 65 insertions(+), 42 deletions(-)

diff --git a/src/physcom/engine/pipeline.py b/src/physcom/engine/pipeline.py
index b2b86ff..d3580ad 100644
--- a/src/physcom/engine/pipeline.py
+++ b/src/physcom/engine/pipeline.py
@@ -9,7 +9,7 @@ from physcom.db.repository import Repository
 from physcom.engine.combinator import generate_combinations
 from physcom.engine.constraint_resolver import ConstraintResolver, ConstraintResult
 from physcom.engine.scorer import Scorer
-from physcom.llm.base import LLMProvider
+from physcom.llm.base import LLMProvider, LLMRateLimitError
 from physcom.models.combination import Combination, ScoredResult
 from physcom.models.domain import Domain
 
@@ -184,12 +184,9 @@ class Pipeline:
                 if 2 in passes and existing_pass < 2:
                     description = _describe_combination(combo)
                     if self.llm:
-                        try:
-                            raw_metrics = self.llm.estimate_physics(
-                                description, metric_names
-                            )
-                        except Exception:
-                            raw_metrics = self._stub_estimate(combo, metric_names)
+                        raw_metrics = self.llm.estimate_physics(
+                            description, metric_names
+                        )
                     else:
                         raw_metrics = self._stub_estimate(combo, metric_names)
 
@@ -287,34 +284,31 @@ class Pipeline:
                             and cur_result["composite_score"] is not None
                             and cur_result["composite_score"] >= score_threshold
                         ):
-                            try:
-                                description = _describe_combination(combo)
-                                db_scores = self.repo.get_combination_scores(
-                                    combo.id, domain.id
-                                )
-                                score_dict = {
-                                    s["metric_name"]: s["normalized_score"]
-                                    for s in db_scores
-                                    if s["normalized_score"] is not None
-                                }
-                                review = self.llm.review_plausibility(
-                                    description, score_dict
-                                )
-                                self.repo.save_result(
-                                    combo.id,
-                                    domain.id,
-                                    cur_result["composite_score"],
-                                    pass_reached=4,
-                                    novelty_flag=cur_result.get("novelty_flag"),
-                                    llm_review=review,
-                                    human_notes=cur_result.get("human_notes"),
-                                )
-                                result.pass4_reviewed += 1
-                                self._update_run_counters(
-                                    run_id, result, current_pass=4
-                                )
-                            except Exception:
-                                pass  # skip this combo; don't abort the run
+                            description = _describe_combination(combo)
+                            db_scores = self.repo.get_combination_scores(
+                                combo.id, domain.id
+                            )
+                            score_dict = {
+                                s["metric_name"]: s["normalized_score"]
+                                for s in db_scores
+                                if s["normalized_score"] is not None
+                            }
+                            review = self.llm.review_plausibility(
+                                description, score_dict
+                            )
+                            self.repo.save_result(
+                                combo.id,
+                                domain.id,
+                                cur_result["composite_score"],
+                                pass_reached=4,
+                                novelty_flag=cur_result.get("novelty_flag"),
+                                llm_review=review,
+                                human_notes=cur_result.get("human_notes"),
+                            )
+                            result.pass4_reviewed += 1
+                            self._update_run_counters(
+                                run_id, result, current_pass=4
+                            )
 
         except CancelledError:
             if run_id is not None:
@@ -325,6 +319,17 @@ class Pipeline:
                 )
             result.top_results = self.repo.get_top_results(domain.name, limit=20)
             return result
+        except LLMRateLimitError:
+            # Rate limit hit — save progress and let the user re-run to continue.
+            # Already-reviewed combos are persisted; resumability skips them next time.
+            if run_id is not None:
+                self.repo.update_pipeline_run(
+                    run_id,
+                    status="completed",
+                    completed_at=datetime.now(timezone.utc).isoformat(),
+                )
+            result.top_results = self.repo.get_top_results(domain.name, limit=20)
+            return result
 
         # Mark run as completed
         if run_id is not None:
diff --git a/src/physcom/llm/base.py b/src/physcom/llm/base.py
index 495301d..d8efb4e 100644
--- a/src/physcom/llm/base.py
+++ b/src/physcom/llm/base.py
@@ -5,6 +5,14 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 
 
+class LLMRateLimitError(Exception):
+    """Raised by a provider when the API rate limit is exceeded.
+
+    The pipeline catches this to stop gracefully and let the user re-run
+    to continue from where it left off.
+    """
+
+
 class LLMProvider(ABC):
     """Abstract LLM interface for physics estimation and plausibility review."""
 
diff --git a/src/physcom/llm/providers/gemini.py b/src/physcom/llm/providers/gemini.py
index 2fe8284..81748f6 100644
--- a/src/physcom/llm/providers/gemini.py
+++ b/src/physcom/llm/providers/gemini.py
@@ -5,7 +5,7 @@ from __future__ import annotations
 import json
 import re
 
-from physcom.llm.base import LLMProvider
+from physcom.llm.base import LLMProvider, LLMRateLimitError
 from physcom.llm.prompts import PHYSICS_ESTIMATION_PROMPT, PLAUSIBILITY_REVIEW_PROMPT
 
 
@@ -29,9 +29,14 @@ class GeminiLLMProvider(LLMProvider):
             description=combination_description,
             metrics=", ".join(metrics),
         )
-        response = self._client.models.generate_content(
-            model=self._model, contents=prompt
-        )
+        try:
+            response = self._client.models.generate_content(
+                model=self._model, contents=prompt
+            )
+        except Exception as exc:
+            if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
+                raise LLMRateLimitError(str(exc)) from exc
+            raise
         return self._parse_json(response.text, metrics)
 
     def review_plausibility(
@@ -42,9 +47,14 @@ class GeminiLLMProvider(LLMProvider):
             description=combination_description,
             scores=scores_str,
         )
-        response = self._client.models.generate_content(
-            model=self._model, contents=prompt
-        )
+        try:
+            response = self._client.models.generate_content(
+                model=self._model, contents=prompt
+            )
+        except Exception as exc:
+            if "429" in str(exc) or "RESOURCE_EXHAUSTED" in str(exc):
+                raise LLMRateLimitError(str(exc)) from exc
+            raise
         return response.text.strip()
 
     def _parse_json(self, text: str, metrics: list[str]) -> dict[str, float]: