add API and Openrouter

2026-03-19 23:18:12 +00:00 · 2026-03-19 23:18:12 +00:00 · 4baaaa438c
commit 4baaaa438c
parent 363421c61c
11 changed files with 702 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -31,6 +31,15 @@ pip install -e .
 In order to use the framework, you need to have access to an LLM.
 Please follow the instructions in the [Controller README](graph_of_thoughts/controller/README.md) to configure the LLM of your choice.
 ### OpenRouter and OpenAI-compatible HTTP API
 1. Install API extras: `pip install "graph_of_thoughts[api]"` (or `pip install -e ".[api]"` from a source checkout).
 2. Copy [`graph_of_thoughts/language_models/config.openrouter.example.yaml`](graph_of_thoughts/language_models/config.openrouter.example.yaml) to `config.openrouter.yaml`, add your [OpenRouter](https://openrouter.ai/) keys and model ids, and either place it in `graph_of_thoughts/language_models/` or point `OPENROUTER_CONFIG` at your file.
 3. Run the server: `got-openrouter-api` (or `python -m graph_of_thoughts.api`).
 4. Call `POST /v1/chat/completions` with a standard OpenAI-style JSON body (`messages`, optional `model`, `temperature`, `max_tokens`). The server runs a small Graph of Operations (generate multiple candidates, score, keep the best) via OpenRouter.
 Details: [Language models README](graph_of_thoughts/language_models/README.md).
 ## Quick Start
 The following code snippet shows how to use the framework to solve the sorting problem for a list of 32 numbers using a CoT-like approach.  
--- a/graph_of_thoughts/api/init.py
+++ b/graph_of_thoughts/api/init.py
@ -0,0 +1 @@
 """HTTP API helpers (FastAPI) for running Graph of Thoughts with OpenRouter."""
--- a/graph_of_thoughts/api/main.py
+++ b/graph_of_thoughts/api/main.py
@ -0,0 +1,4 @@
 from graph_of_thoughts.api.app import run
 if __name__ == "__main__":
    run()
--- a/graph_of_thoughts/api/app.py
+++ b/graph_of_thoughts/api/app.py
@ -0,0 +1,192 @@
 # Copyright (c) 2023 ETH Zurich.
 #                    All rights reserved.
 #
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 from __future__ import annotations
 import logging
 import os
 import time
 import uuid
 from typing import Any, Dict, List, Optional
 from graph_of_thoughts.api.got_openai_pipeline import (
    ChatCompletionParser,
    ChatCompletionPrompter,
    build_default_chat_graph,
    extract_assistant_text,
    format_chat_messages,
 )
 from graph_of_thoughts.controller import Controller
 from graph_of_thoughts.language_models.openrouter import (
    OpenRouter,
    OpenRouterBadRequestError,
    OpenRouterRateLimitError,
 )
 try:
    from fastapi import FastAPI, HTTPException
    from fastapi.responses import JSONResponse
    from pydantic import BaseModel, Field
 except ImportError as e:
    raise ImportError(
        "FastAPI and Pydantic are required for the HTTP API. "
        'Install with: pip install "graph_of_thoughts[api]"'
    ) from e
 class ChatMessage(BaseModel):
    role: str
    content: str
 class ChatCompletionRequest(BaseModel):
    model: Optional[str] = None
    messages: List[ChatMessage]
    temperature: Optional[float] = None
    max_tokens: Optional[int] = None
    stream: Optional[bool] = False
    n: Optional[int] = Field(default=1, ge=1, le=1)
 def _get_config_path() -> str:
    return os.environ.get(
        "OPENROUTER_CONFIG",
        os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            "language_models",
            "config.openrouter.yaml",
        ),
    )
 def _run_controller(lm: OpenRouter, user_text: str) -> str:
    graph = build_default_chat_graph(num_candidates=3)
    ctrl = Controller(
        lm,
        graph,
        ChatCompletionPrompter(),
        ChatCompletionParser(),
        {"input": user_text},
    )
    ctrl.run()
    return extract_assistant_text(ctrl.get_final_thoughts())
 app = FastAPI(
    title="Graph of Thoughts (OpenRouter)",
    version="0.1.0",
    description="OpenAI-compatible chat completions backed by Graph of Operations + OpenRouter.",
 )
@app.on_event("startup")
 def _startup() -> None:
    logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
@app.get("/v1/models")
 def list_models() -> Dict[str, Any]:
    path = _get_config_path()
    if not os.path.isfile(path):
        return {"object": "list", "data": []}
    from graph_of_thoughts.language_models.openrouter import load_openrouter_config
    cfg = load_openrouter_config(path)
    models = cfg.get("models") or []
    if isinstance(models, str):
        models = [models]
    data = [
        {
            "id": m,
            "object": "model",
            "created": int(time.time()),
            "owned_by": "openrouter",
        }
        for m in models
    ]
    return {"object": "list", "data": data}
@app.post("/v1/chat/completions")
 def chat_completions(body: ChatCompletionRequest) -> JSONResponse:
    if body.stream:
        raise HTTPException(
            status_code=400,
            detail="stream=true is not supported; use stream=false.",
        )
    if body.n != 1:
        raise HTTPException(status_code=400, detail="Only n=1 is supported.")
    path = _get_config_path()
    if not os.path.isfile(path):
        raise HTTPException(
            status_code=500,
            detail=f"OpenRouter config not found at {path}. Set OPENROUTER_CONFIG.",
        )
    lm = OpenRouter(config_path=path)
    user_text = format_chat_messages(
        [{"role": m.role, "content": m.content} for m in body.messages]
    )
    try:
        lm.set_request_overrides(
            model=body.model,
            temperature=body.temperature,
            max_tokens=body.max_tokens,
        )
        try:
            answer = _run_controller(lm, user_text)
        finally:
            lm.clear_request_overrides()
    except OpenRouterRateLimitError as e:
        raise HTTPException(status_code=429, detail=str(e)) from e
    except OpenRouterBadRequestError as e:
        raise HTTPException(status_code=400, detail=str(e)) from e
    model_id = (
        body.model
        or lm.generation_model_id
        or lm.last_model_id
        or (lm.models[0] if lm.models else "openrouter")
    )
    resp_id = f"chatcmpl-{uuid.uuid4().hex}"
    now = int(time.time())
    payload = {
        "id": resp_id,
        "object": "chat.completion",
        "created": now,
        "model": model_id,
        "choices": [
            {
                "index": 0,
                "message": {"role": "assistant", "content": answer},
                "finish_reason": "stop",
            }
        ],
        "usage": {
            "prompt_tokens": lm.prompt_tokens,
            "completion_tokens": lm.completion_tokens,
            "total_tokens": lm.prompt_tokens + lm.completion_tokens,
        },
    }
    return JSONResponse(content=payload)
 def run() -> None:
    import uvicorn
    host = os.environ.get("HOST", "0.0.0.0")
    port = int(os.environ.get("PORT", "8000"))
    uvicorn.run(
        "graph_of_thoughts.api.app:app",
        host=host,
        port=port,
        reload=os.environ.get("RELOAD", "").lower() in ("1", "true", "yes"),
    )
 if __name__ == "__main__":
    run()
--- a/graph_of_thoughts/api/got_openai_pipeline.py
+++ b/graph_of_thoughts/api/got_openai_pipeline.py
@ -0,0 +1,123 @@
 # Copyright (c) 2023 ETH Zurich.
 #                    All rights reserved.
 #
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 from __future__ import annotations
 import json
 import re
 from typing import Any, Dict, List, Union
 from graph_of_thoughts.operations import GraphOfOperations, operations
 from graph_of_thoughts.operations.thought import Thought
 from graph_of_thoughts.parser import Parser
 from graph_of_thoughts.prompter import Prompter
 def format_chat_messages(messages: List[Dict[str, str]]) -> str:
    parts: List[str] = []
    for m in messages:
        role = m.get("role", "user")
        content = m.get("content", "")
        if not isinstance(content, str):
            content = str(content)
        parts.append(f"{role.upper()}:\n{content}")
    return "\n\n".join(parts)
 class ChatCompletionPrompter(Prompter):
    """Prompter for a small generate → score → keep-best Graph of Operations."""
    def generate_prompt(self, num_branches: int, **kwargs: Any) -> str:
        problem = kwargs.get("input", "")
        return (
            "You are a careful assistant. Read the conversation below and produce "
            "one candidate answer for the USER's latest needs.\n\n"
            f"{problem}\n\n"
            "Reply with your answer only, no preamble."
        )
    def score_prompt(self, state_dicts: List[Dict], **kwargs: Any) -> str:
        lines = [
            "You evaluate candidate answers for the same problem. "
            "Score each candidate from 0 (worst) to 10 (best) on correctness, "
            "completeness, and relevance.",
            "",
            "Return ONLY a JSON array of numbers, one score per candidate in order, e.g. [7, 5, 9].",
            "",
        ]
        for i, st in enumerate(state_dicts):
            cand = st.get("candidate", "")
            lines.append(f"Candidate {i}:\n{cand}\n")
        return "\n".join(lines)
    def aggregation_prompt(self, state_dicts: List[Dict], **kwargs: Any) -> str:
        raise RuntimeError("aggregation_prompt is not used by the chat completion pipeline")
    def improve_prompt(self, **kwargs: Any) -> str:
        raise RuntimeError("improve_prompt is not used by the chat completion pipeline")
    def validation_prompt(self, **kwargs: Any) -> str:
        raise RuntimeError("validation_prompt is not used by the chat completion pipeline")
 class ChatCompletionParser(Parser):
    def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:
        out: List[Dict] = []
        for i, t in enumerate(texts):
            out.append({"candidate": (t or "").strip(), "branch_index": i})
        return out
    def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:
        raw = texts[0] if texts else ""
        scores = self._scores_from_text(raw, len(states))
        if len(scores) < len(states):
            scores.extend([0.0] * (len(states) - len(scores)))
        return scores[: len(states)]
    def _scores_from_text(self, raw: str, n: int) -> List[float]:
        raw = raw.strip()
        try:
            data = json.loads(raw)
            if isinstance(data, list):
                return [float(x) for x in data]
        except (json.JSONDecodeError, ValueError, TypeError):
            pass
        nums = re.findall(r"-?\d+(?:\.\d+)?", raw)
        return [float(x) for x in nums[:n]]
    def parse_aggregation_answer(
        self, states: List[Dict], texts: List[str]
    ) -> Union[Dict, List[Dict]]:
        raise RuntimeError("parse_aggregation_answer is not used")
    def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:
        raise RuntimeError("parse_improve_answer is not used")
    def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:
        raise RuntimeError("parse_validation_answer is not used")
 def build_default_chat_graph(num_candidates: int = 3) -> GraphOfOperations:
    g = GraphOfOperations()
    g.append_operation(
        operations.Generate(
            num_branches_prompt=1, num_branches_response=num_candidates
        )
    )
    g.append_operation(operations.Score(combined_scoring=True))
    g.append_operation(operations.KeepBestN(1))
    return g
 def extract_assistant_text(final_thoughts_list: List[List[Thought]]) -> str:
    """``get_final_thoughts`` returns one list per leaf operation; we take the first leaf's first thought."""
    if not final_thoughts_list:
        return ""
    thoughts = final_thoughts_list[0]
    if not thoughts:
        return ""
    state = thoughts[0].state or {}
    return str(state.get("candidate", ""))
--- a/graph_of_thoughts/language_models/README.md
+++ b/graph_of_thoughts/language_models/README.md
@ -4,6 +4,7 @@ The Language Models module is responsible for managing the large language models
 Currently, the framework supports the following LLMs:
 - GPT-4 / GPT-3.5 (Remote - OpenAI API)
 - OpenRouter (Remote - [OpenRouter](https://openrouter.ai/) OpenAI-compatible API, multi-key / multi-model rotation)
 - LLaMA-2 (Local - HuggingFace Transformers)
 The following sections describe how to instantiate individual LLMs and how to add new LLMs to the framework.
@ -28,12 +29,26 @@ The following sections describe how to instantiate individual LLMs and how to ad
 - Instantiate the language model based on the selected configuration key (predefined / custom).
 ```python
-lm = controller.ChatGPT(
+from graph_of_thoughts.language_models import ChatGPT
 lm = ChatGPT(
    "path/to/config.json",
    model_name=<configuration key>
 )
 ```
 ### OpenRouter
 - Copy `config.openrouter.example.yaml` (or `.json`) to `config.openrouter.yaml` next to this module, or pass an explicit path.
 - Set `api_keys` (list) and `models` (list). Each request picks a **random** key and a **random** model (uniform over the lists). If the HTTP API passes a `model` field, that model id is used for that request instead of a random one.
 - Optional: `http_referer` and `x_title` for OpenRouter attribution headers (see [OpenRouter docs](https://openrouter.ai/docs)).
 - HTTP **429** responses trigger exponential backoff and further rotation; **400** responses are retried a limited number of times with a new key/model pair, then surfaced as an error.
 ```python
 from graph_of_thoughts.language_models import OpenRouter
 lm = OpenRouter("/path/to/config.openrouter.yaml")
 ```
 ### LLaMA-2
 - Requires local hardware to run inference and a HuggingFace account.
 - Adjust the predefined `llama7b-hf`, `llama13b-hf` or `llama70b-hf` configurations or create a new configuration with an unique key.
@ -50,7 +65,9 @@ lm = controller.ChatGPT(
 - Instantiate the language model based on the selected configuration key (predefined / custom).
 ```python
-lm = controller.Llama2HF(
+from graph_of_thoughts.language_models import Llama2HF
 lm = Llama2HF(
    "path/to/config.json",
    model_name=<configuration key>
 )
--- a/graph_of_thoughts/language_models/init.py
+++ b/graph_of_thoughts/language_models/init.py
@ -1,3 +1,10 @@
 from .abstract_language_model import AbstractLanguageModel
 from .chatgpt import ChatGPT
 from .llamachat_hf import Llama2HF
 from .openrouter import (
    OpenRouter,
    OpenRouterBadRequestError,
    OpenRouterError,
    OpenRouterRateLimitError,
    load_openrouter_config,
 )
--- a/graph_of_thoughts/language_models/config.openrouter.example.json
+++ b/graph_of_thoughts/language_models/config.openrouter.example.json
@ -0,0 +1,21 @@
 {
  "base_url": "https://openrouter.ai/api/v1",
  "api_keys": [
    "sk-or-v1-replace-me-1",
    "sk-or-v1-replace-me-2"
  ],
  "models": [
    "openai/gpt-4o-mini",
    "anthropic/claude-3.5-haiku"
  ],
  "temperature": 0.7,
  "max_tokens": 4096,
  "stop": null,
  "prompt_token_cost": 0.0,
  "response_token_cost": 0.0,
  "max_retries_429": 8,
  "max_retries_400": 3,
  "base_backoff_seconds": 1.0,
  "http_referer": "",
  "x_title": ""
 }
--- a/graph_of_thoughts/language_models/config.openrouter.example.yaml
+++ b/graph_of_thoughts/language_models/config.openrouter.example.yaml
@ -0,0 +1,29 @@
 # Copy to config.openrouter.yaml (or set path explicitly) and fill in keys.
 # Per chat request, an API key and model are chosen at random (uniform) from the lists.
 base_url: https://openrouter.ai/api/v1
 api_keys:
  - sk-or-v1-replace-me-1
  - sk-or-v1-replace-me-2
 models:
  - openai/gpt-4o-mini
  - anthropic/claude-3.5-haiku
 temperature: 0.7
 max_tokens: 4096
 stop: null
 # Optional cost accounting (set to 0 if unknown)
 prompt_token_cost: 0.0
 response_token_cost: 0.0
 # Retries after HTTP 429 / 400 (each retry uses a fresh random key + model)
 max_retries_429: 8
 max_retries_400: 3
 base_backoff_seconds: 1.0
 # Optional OpenRouter attribution headers (recommended by OpenRouter)
 http_referer: ""
 x_title: ""
--- a/graph_of_thoughts/language_models/openrouter.py
+++ b/graph_of_thoughts/language_models/openrouter.py
@ -0,0 +1,287 @@
 # Copyright (c) 2023 ETH Zurich.
 #                    All rights reserved.
 #
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 from __future__ import annotations
 import json
 import logging
 import os
 import random
 import time
 from typing import Any, Dict, List, Optional, Union
 import yaml
 from openai import APIStatusError, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from .abstract_language_model import AbstractLanguageModel
 class OpenRouterError(Exception):
    """Base error for OpenRouter integration."""
 class OpenRouterBadRequestError(OpenRouterError):
    """Raised when OpenRouter returns HTTP 400 after retries."""
 class OpenRouterRateLimitError(OpenRouterError):
    """Raised when OpenRouter returns HTTP 429 after retries."""
 def load_openrouter_config(path: str) -> Dict[str, Any]:
    """Load a YAML or JSON OpenRouter configuration file."""
    return _load_config_file(path)
 def _load_config_file(path: str) -> Dict[str, Any]:
    ext = os.path.splitext(path)[1].lower()
    with open(path, "r", encoding="utf-8") as f:
        if ext in (".yaml", ".yml"):
            data = yaml.safe_load(f)
        else:
            data = json.load(f)
    if not isinstance(data, dict):
        raise ValueError(f"Config at {path} must be a JSON/YAML object")
    return data
 class OpenRouter(AbstractLanguageModel):
    """
    OpenRouter-backed language model with per-request rotation of API keys and models.
    Configuration is loaded from YAML or JSON (see ``config.openrouter.example.yaml``).
    """
    def __init__(
        self,
        config_path: str = "",
        model_name: str = "openrouter",
        cache: bool = False,
    ) -> None:
        self._rotation_model_name = model_name
        self._request_overrides: Dict[str, Any] = {}
        super().__init__(config_path, model_name, cache)
        self._apply_openrouter_config()
    def load_config(self, path: str) -> None:
        if path == "":
            path = os.path.join(
                os.path.dirname(os.path.abspath(__file__)),
                "config.openrouter.yaml",
            )
        self.config_path = path
        self.config = _load_config_file(path)
        self.logger.debug("Loaded OpenRouter config from %s", path)
    def _apply_openrouter_config(self) -> None:
        cfg = self.config
        self.base_url: str = cfg.get("base_url", "https://openrouter.ai/api/v1")
        keys = cfg.get("api_keys") or []
        if isinstance(keys, str):
            keys = [keys]
        self.api_keys: List[str] = [k for k in keys if k]
        if not self.api_keys:
            raise ValueError("OpenRouter config must define non-empty 'api_keys'")
        models = cfg.get("models") or []
        if isinstance(models, str):
            models = [models]
        self.models: List[str] = [m for m in models if m]
        if not self.models:
            raise ValueError("OpenRouter config must define non-empty 'models'")
        self.temperature: float = float(cfg.get("temperature", 1.0))
        self.max_tokens: int = int(cfg.get("max_tokens", 4096))
        self.stop: Union[str, List[str], None] = cfg.get("stop")
        self.prompt_token_cost: float = float(cfg.get("prompt_token_cost", 0.0))
        self.response_token_cost: float = float(cfg.get("response_token_cost", 0.0))
        self.max_retries_429: int = int(cfg.get("max_retries_429", 8))
        self.max_retries_400: int = int(cfg.get("max_retries_400", 3))
        self.base_backoff_seconds: float = float(cfg.get("base_backoff_seconds", 1.0))
        self.http_referer: str = cfg.get("http_referer", "") or os.getenv(
            "OPENROUTER_HTTP_REFERER", ""
        )
        self.x_title: str = cfg.get("x_title", "") or os.getenv("OPENROUTER_X_TITLE", "")
        self.model_name = self._rotation_model_name
        self.last_model_id: Optional[str] = None
        self.generation_model_id: Optional[str] = None
    def set_request_overrides(self, **kwargs: Any) -> None:
        """Optional per-request parameters (used by the HTTP API). Cleared with :meth:`clear_request_overrides`."""
        self._request_overrides = {k: v for k, v in kwargs.items() if v is not None}
    def clear_request_overrides(self) -> None:
        self._request_overrides = {}
    def _pick_key(self) -> str:
        return random.choice(self.api_keys)
    def _pick_model(self, override: Optional[str]) -> str:
        if override:
            return override
        o = self._request_overrides.get("model")
        if o:
            return str(o)
        return random.choice(self.models)
    def _effective_temperature(self) -> float:
        t = self._request_overrides.get("temperature")
        return float(t) if t is not None else self.temperature
    def _effective_max_tokens(self) -> int:
        m = self._request_overrides.get("max_tokens")
        return int(m) if m is not None else self.max_tokens
    def _client_for_key(self, api_key: str) -> OpenAI:
        headers: Dict[str, str] = {}
        if self.http_referer:
            headers["HTTP-Referer"] = self.http_referer
        if self.x_title:
            headers["X-Title"] = self.x_title
        return OpenAI(
            base_url=self.base_url,
            api_key=api_key,
            default_headers=headers or None,
        )
    def _sleep_backoff(self, attempt: int) -> None:
        cap = 60.0
        delay = min(
            self.base_backoff_seconds * (2**attempt) + random.random(),
            cap,
        )
        self.logger.warning("Backing off %.2fs (attempt %d)", delay, attempt + 1)
        time.sleep(delay)
    def chat(
        self,
        messages: List[Dict[str, str]],
        num_responses: int = 1,
        model_override: Optional[str] = None,
    ) -> ChatCompletion:
        """
        Call OpenRouter chat completions with rotation and retries for 429/400.
        """
        attempts_429 = 0
        attempts_400 = 0
        attempt = 0
        last_exc: Optional[Exception] = None
        while True:
            api_key = self._pick_key()
            model_id = self._pick_model(model_override)
            client = self._client_for_key(api_key)
            try:
                response = client.chat.completions.create(
                    model=model_id,
                    messages=messages,
                    temperature=self._effective_temperature(),
                    max_tokens=self._effective_max_tokens(),
                    n=num_responses,
                    stop=self.stop,
                )
                if response.usage is not None:
                    self.prompt_tokens += response.usage.prompt_tokens or 0
                    self.completion_tokens += response.usage.completion_tokens or 0
                    pt_k = float(self.prompt_tokens) / 1000.0
                    ct_k = float(self.completion_tokens) / 1000.0
                    self.cost = (
                        self.prompt_token_cost * pt_k
                        + self.response_token_cost * ct_k
                    )
                self.last_model_id = model_id
                if self.generation_model_id is None:
                    self.generation_model_id = model_id
                self.logger.info(
                    "OpenRouter response model=%s id=%s", model_id, response.id
                )
                return response
            except APIStatusError as e:
                last_exc = e
                code = e.status_code
                if code == 429:
                    if attempts_429 >= self.max_retries_429:
                        raise OpenRouterRateLimitError(
                            f"OpenRouter rate limited after {attempts_429} retries: {e.message}"
                        ) from e
                    attempts_429 += 1
                    self._sleep_backoff(attempt)
                    attempt += 1
                    continue
                if code == 400:
                    self.logger.warning(
                        "OpenRouter HTTP 400 (will retry with rotated key/model if allowed): %s body=%s",
                        e.message,
                        e.body,
                    )
                    if attempts_400 >= self.max_retries_400:
                        raise OpenRouterBadRequestError(
                            f"OpenRouter bad request after {attempts_400} retries: {e.message}"
                        ) from e
                    attempts_400 += 1
                    attempt += 1
                    time.sleep(random.uniform(0.2, 0.8))
                    continue
                raise
            except Exception:
                self.logger.exception("Unexpected error calling OpenRouter")
                raise
    def query(
        self, query: str, num_responses: int = 1
    ) -> Union[List[ChatCompletion], ChatCompletion]:
        if self.cache and query in self.response_cache:
            return self.response_cache[query]
        messages = [{"role": "user", "content": query}]
        model_ov = self._request_overrides.get("model")
        model_override = str(model_ov) if model_ov else None
        if num_responses == 1:
            response = self.chat(messages, 1, model_override=model_override)
        else:
            response = []
            next_try = num_responses
            total_num_attempts = num_responses
            remaining = num_responses
            while remaining > 0 and total_num_attempts > 0:
                try:
                    assert next_try > 0
                    res = self.chat(
                        messages, next_try, model_override=model_override
                    )
                    response.append(res)
                    remaining -= next_try
                    next_try = min(remaining, next_try)
                except Exception as e:
                    next_try = max(1, (next_try + 1) // 2)
                    self.logger.warning(
                        "Error in OpenRouter query: %s, retrying with n=%s",
                        e,
                        next_try,
                    )
                    time.sleep(random.uniform(0.5, 2.0))
                    total_num_attempts -= 1
        if self.cache:
            self.response_cache[query] = response
        return response
    def get_response_texts(
        self, query_response: Union[List[ChatCompletion], ChatCompletion]
    ) -> List[str]:
        if not isinstance(query_response, list):
            query_response = [query_response]
        texts: List[str] = []
        for response in query_response:
            for choice in response.choices:
                c = choice.message.content
                texts.append(c if c is not None else "")
        return texts
--- a/pyproject.toml
+++ b/pyproject.toml
@ -22,6 +22,7 @@ classifiers = [
 dependencies = [
  "backoff>=2.2.1,<3.0.0",
  "openai>=1.0.0,<2.0.0",
  "pyyaml>=6.0.1,<7.0.0",
  "matplotlib>=3.7.1,<4.0.0",
  "numpy>=1.24.3,<2.0.0",
  "pandas>=2.0.3,<3.0.0",
@ -33,7 +34,14 @@ dependencies = [
  "scipy>=1.10.1,<2.0.0",
 ]
 [project.optional-dependencies]
 api = [
  "fastapi>=0.109.0,<1.0.0",
  "uvicorn[standard]>=0.27.0,<1.0.0",
 ]
 [project.urls]
 Homepage = "https://github.com/spcl/graph-of-thoughts"
 [project.scripts]
 got-openrouter-api = "graph_of_thoughts.api.app:run"
		`@ -0,0 +1 @@`
							`"""HTTP API helpers (FastAPI) for running Graph of Thoughts with OpenRouter."""`