common.lib.llm
1import json 2import requests 3from pathlib import Path 4from typing import List, Optional, Union 5from pydantic import SecretStr 6from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage 7from langchain_core.language_models.chat_models import BaseChatModel 8from langchain_anthropic import ChatAnthropic 9from langchain_google_genai import ChatGoogleGenerativeAI 10from langchain_ollama import ChatOllama 11from langchain_openai import ChatOpenAI 12from langchain_mistralai import ChatMistralAI 13from langchain_deepseek import ChatDeepSeek 14 15 16class LLMAdapter: 17 def __init__( 18 self, 19 provider: str, 20 model: str, 21 api_key: Optional[str] = None, 22 base_url: Optional[str] = None, 23 temperature: float = 0.1, 24 max_tokens: int = 1000, 25 client_kwargs: Optional[dict] = None, 26 ): 27 """ 28 provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' 29 model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) 30 api_key: API key if required (OpenAI, Claude, Google, Mistral) 31 base_url: for local models or Mistral custom endpoints 32 temperature: temperature hyperparameter, 33 max_tokens: how many output tokens may be used 34 client_kwargs: additional client parameters 35 """ 36 self.provider = provider.lower() 37 self.model = model 38 self.api_key = api_key 39 self.base_url = base_url 40 self.temperature = temperature 41 self.structured_output = False 42 self.parser = None 43 self.max_tokens = max_tokens 44 self.client_kwargs = dict(client_kwargs) if client_kwargs else {} 45 self.llm: BaseChatModel = self._load_llm() 46 47 def _load_llm(self) -> BaseChatModel: 48 if self.provider == "openai": 49 kwargs = {} 50 if "o3" not in self.model: 51 kwargs["temperature"] = self.temperature # temperature not supported for all models 52 return ChatOpenAI( 53 model=self.model, 54 api_key=SecretStr(self.api_key), 55 base_url=self.base_url or "https://api.openai.com/v1", 56 max_tokens=self.max_tokens, 57 **kwargs 58 ) 59 elif self.provider == "google": 60 return ChatGoogleGenerativeAI( 61 model=self.model, 62 temperature=self.temperature, 63 google_api_key=self.api_key, 64 max_tokens=self.max_tokens 65 ) 66 elif self.provider == "anthropic": 67 return ChatAnthropic( 68 model_name=self.model, 69 temperature=self.temperature, 70 api_key=SecretStr(self.api_key), 71 max_tokens=self.max_tokens, 72 timeout=100, 73 stop=None 74 ) 75 elif self.provider == "mistral": 76 return ChatMistralAI( 77 model_name=self.model, 78 temperature=self.temperature, 79 api_key=SecretStr(self.api_key), 80 base_url=self.base_url, # Optional override 81 max_tokens=self.max_tokens, 82 ) 83 elif self.provider == "deepseek": 84 return ChatDeepSeek( 85 model=self.model, 86 temperature=self.temperature, 87 api_key=SecretStr(self.api_key), 88 base_url=self.base_url, 89 max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192, 90 ) 91 elif self.provider == "ollama": 92 ollama_adapter = ChatOllama( 93 model=self.model, 94 temperature=self.temperature, 95 base_url=self.base_url or "http://localhost:11434", 96 max_tokens=self.max_tokens, 97 client_kwargs=self.client_kwargs 98 ) 99 self.model = ollama_adapter.model 100 return ollama_adapter 101 elif self.provider in {"vllm", "lmstudio"}: 102 # OpenAI-compatible local servers 103 if self.provider == "lmstudio" and not self.api_key: 104 self.api_key = "lm-studio" 105 106 # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately. 107 if self.provider == "vllm" and self.model=="vllm_model": 108 model_name = self.get_vllm_model_name(self.base_url, self.api_key) 109 self.model = model_name 110 else: 111 model_name = self.model if self.model else "lmstudio-model" 112 113 llm = ChatOpenAI( 114 model=model_name, 115 temperature=self.temperature, 116 api_key=SecretStr(self.api_key), 117 base_url=self.base_url, 118 max_tokens=self.max_tokens, 119 ) 120 self.model = llm.model_name 121 return llm 122 else: 123 raise ValueError(f"Unsupported LLM provider: {self.provider}") 124 125 def generate_text( 126 self, 127 messages: Union[str, List[BaseMessage]], 128 system_prompt: Optional[str] = None, 129 temperature: float = 0.1, 130 files: Optional[List[Union[str, Path, dict]]] = None, 131 ) -> BaseMessage: 132 """ 133 Supports string input or LangChain message list, with optional multimodal files. 134 135 :param messages: Text prompt or list of LangChain messages 136 :param system_prompt: Optional system prompt 137 :param temperature: Temperature for generation 138 :param files: Optional list of file paths or content dicts for multimodal input 139 :returns: Generated response message 140 """ 141 if isinstance(messages, str): 142 lc_messages = [] 143 if system_prompt: 144 lc_messages.append(SystemMessage(content=system_prompt)) 145 146 # Create multimodal content if files are provided 147 if files: 148 multimodal_content = self.create_multimodal_content(messages, files) 149 lc_messages.append(HumanMessage(content=multimodal_content)) 150 else: 151 lc_messages.append(HumanMessage(content=messages)) 152 else: 153 lc_messages = messages 154 155 kwargs = {"temperature": temperature} 156 if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: 157 kwargs = {} 158 159 try: 160 response = self.llm.invoke(lc_messages, **kwargs) 161 except Exception as e: 162 raise e 163 164 return response 165 166 def create_multimodal_content( 167 self, 168 text: str, 169 image_urls: Optional[List[str]] = None, 170 ) -> List[dict]: 171 """ 172 Create multimodal content structure for LangChain messages with media URLs. 173 Only supports image URLs for now. 174 175 :param text: Text content 176 :param image_urls: List of media URLs (http/https) 177 :returns: List of content blocks 178 """ 179 content = [] 180 181 # Add image URLs first 182 if image_urls: 183 for url in image_urls: 184 if not isinstance(url, str): 185 raise ValueError(f"Image URL must be a string, got {type(url)}") 186 187 # Format based on provider 188 if self.provider == "anthropic": 189 content.append( 190 {"type": "image", "source": {"type": "url", "url": url}} 191 ) 192 else: 193 # OpenAI-style format 194 content.append({"type": "image_url", "image_url": {"url": url}}) 195 196 # Add text content 197 if text: 198 content.append({"type": "text", "text": text}) 199 200 return content 201 202 def set_structure(self, json_schema): 203 if not json_schema: 204 raise ValueError("json_schema is None") 205 206 if isinstance(json_schema, str): 207 json_schema = json.loads(json_schema) 208 209 json.dumps(json_schema) # To validate / raise an error 210 211 # LM Studio needs some more guidance 212 if self.provider == "lmstudio": 213 json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}} 214 self.llm = self.llm.bind(response_format=json_schema) 215 else: 216 self.llm = self.llm.with_structured_output(json_schema) 217 self.structured_output = True 218 219 @staticmethod 220 def get_model_options(config) -> dict: 221 """ 222 Returns model choice options for UserInput 223 """ 224 models = LLMAdapter.get_models(config) 225 if not models: 226 return {} 227 options = {model_id: model_values["name"] for model_id, model_values in models.items()} 228 return options 229 230 @staticmethod 231 def get_model_providers(config) -> dict: 232 """ 233 Returns available model providers through APIs 234 """ 235 models = LLMAdapter.get_models(config) 236 if not models: 237 return {} 238 providers = list(set([model_values.get("provider", "") for model_values in models.values()])) 239 if not providers: 240 return {} 241 options = {provider: provider.capitalize() for provider in providers if provider} 242 return options 243 244 @staticmethod 245 def get_models(config) -> dict: 246 """ 247 Returns a dict with LLM models supported by 4CAT, either through an API or as a local option. 248 Make sure to keep up-to-date! 249 250 :returns dict, A dict with model IDs as keys and details as values 251 """ 252 with ( 253 config.get("PATH_ROOT") 254 .joinpath("common/assets/llms.json") 255 .open() as available_models 256 ): 257 available_models = json.loads(available_models.read()) 258 return available_models 259 260 261 @staticmethod 262 def get_vllm_model_name(base_url: str, api_key: str = None) -> str: 263 """ 264 Query vLLM server to get the name of the served model. 265 """ 266 267 try: 268 # vLLM exposes available models at /v1/models endpoint 269 models_url = f"{base_url.rstrip('/')}/models" 270 headers = {} 271 if api_key: 272 headers["Authorization"] = f"Bearer {api_key}" 273 274 response = requests.get(models_url, headers=headers, timeout=10) 275 response.raise_for_status() 276 models_data = response.json() 277 278 # Get the first available model 279 if models_data.get("data") and len(models_data["data"]) > 0: 280 return models_data["data"][0]["id"] 281 else: 282 raise ValueError("No models found on vLLM server") 283 except Exception as e: 284 raise ValueError(f"Could not retrieve model name from vLLM server: {e}")
17class LLMAdapter: 18 def __init__( 19 self, 20 provider: str, 21 model: str, 22 api_key: Optional[str] = None, 23 base_url: Optional[str] = None, 24 temperature: float = 0.1, 25 max_tokens: int = 1000, 26 client_kwargs: Optional[dict] = None, 27 ): 28 """ 29 provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' 30 model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) 31 api_key: API key if required (OpenAI, Claude, Google, Mistral) 32 base_url: for local models or Mistral custom endpoints 33 temperature: temperature hyperparameter, 34 max_tokens: how many output tokens may be used 35 client_kwargs: additional client parameters 36 """ 37 self.provider = provider.lower() 38 self.model = model 39 self.api_key = api_key 40 self.base_url = base_url 41 self.temperature = temperature 42 self.structured_output = False 43 self.parser = None 44 self.max_tokens = max_tokens 45 self.client_kwargs = dict(client_kwargs) if client_kwargs else {} 46 self.llm: BaseChatModel = self._load_llm() 47 48 def _load_llm(self) -> BaseChatModel: 49 if self.provider == "openai": 50 kwargs = {} 51 if "o3" not in self.model: 52 kwargs["temperature"] = self.temperature # temperature not supported for all models 53 return ChatOpenAI( 54 model=self.model, 55 api_key=SecretStr(self.api_key), 56 base_url=self.base_url or "https://api.openai.com/v1", 57 max_tokens=self.max_tokens, 58 **kwargs 59 ) 60 elif self.provider == "google": 61 return ChatGoogleGenerativeAI( 62 model=self.model, 63 temperature=self.temperature, 64 google_api_key=self.api_key, 65 max_tokens=self.max_tokens 66 ) 67 elif self.provider == "anthropic": 68 return ChatAnthropic( 69 model_name=self.model, 70 temperature=self.temperature, 71 api_key=SecretStr(self.api_key), 72 max_tokens=self.max_tokens, 73 timeout=100, 74 stop=None 75 ) 76 elif self.provider == "mistral": 77 return ChatMistralAI( 78 model_name=self.model, 79 temperature=self.temperature, 80 api_key=SecretStr(self.api_key), 81 base_url=self.base_url, # Optional override 82 max_tokens=self.max_tokens, 83 ) 84 elif self.provider == "deepseek": 85 return ChatDeepSeek( 86 model=self.model, 87 temperature=self.temperature, 88 api_key=SecretStr(self.api_key), 89 base_url=self.base_url, 90 max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192, 91 ) 92 elif self.provider == "ollama": 93 ollama_adapter = ChatOllama( 94 model=self.model, 95 temperature=self.temperature, 96 base_url=self.base_url or "http://localhost:11434", 97 max_tokens=self.max_tokens, 98 client_kwargs=self.client_kwargs 99 ) 100 self.model = ollama_adapter.model 101 return ollama_adapter 102 elif self.provider in {"vllm", "lmstudio"}: 103 # OpenAI-compatible local servers 104 if self.provider == "lmstudio" and not self.api_key: 105 self.api_key = "lm-studio" 106 107 # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately. 108 if self.provider == "vllm" and self.model=="vllm_model": 109 model_name = self.get_vllm_model_name(self.base_url, self.api_key) 110 self.model = model_name 111 else: 112 model_name = self.model if self.model else "lmstudio-model" 113 114 llm = ChatOpenAI( 115 model=model_name, 116 temperature=self.temperature, 117 api_key=SecretStr(self.api_key), 118 base_url=self.base_url, 119 max_tokens=self.max_tokens, 120 ) 121 self.model = llm.model_name 122 return llm 123 else: 124 raise ValueError(f"Unsupported LLM provider: {self.provider}") 125 126 def generate_text( 127 self, 128 messages: Union[str, List[BaseMessage]], 129 system_prompt: Optional[str] = None, 130 temperature: float = 0.1, 131 files: Optional[List[Union[str, Path, dict]]] = None, 132 ) -> BaseMessage: 133 """ 134 Supports string input or LangChain message list, with optional multimodal files. 135 136 :param messages: Text prompt or list of LangChain messages 137 :param system_prompt: Optional system prompt 138 :param temperature: Temperature for generation 139 :param files: Optional list of file paths or content dicts for multimodal input 140 :returns: Generated response message 141 """ 142 if isinstance(messages, str): 143 lc_messages = [] 144 if system_prompt: 145 lc_messages.append(SystemMessage(content=system_prompt)) 146 147 # Create multimodal content if files are provided 148 if files: 149 multimodal_content = self.create_multimodal_content(messages, files) 150 lc_messages.append(HumanMessage(content=multimodal_content)) 151 else: 152 lc_messages.append(HumanMessage(content=messages)) 153 else: 154 lc_messages = messages 155 156 kwargs = {"temperature": temperature} 157 if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: 158 kwargs = {} 159 160 try: 161 response = self.llm.invoke(lc_messages, **kwargs) 162 except Exception as e: 163 raise e 164 165 return response 166 167 def create_multimodal_content( 168 self, 169 text: str, 170 image_urls: Optional[List[str]] = None, 171 ) -> List[dict]: 172 """ 173 Create multimodal content structure for LangChain messages with media URLs. 174 Only supports image URLs for now. 175 176 :param text: Text content 177 :param image_urls: List of media URLs (http/https) 178 :returns: List of content blocks 179 """ 180 content = [] 181 182 # Add image URLs first 183 if image_urls: 184 for url in image_urls: 185 if not isinstance(url, str): 186 raise ValueError(f"Image URL must be a string, got {type(url)}") 187 188 # Format based on provider 189 if self.provider == "anthropic": 190 content.append( 191 {"type": "image", "source": {"type": "url", "url": url}} 192 ) 193 else: 194 # OpenAI-style format 195 content.append({"type": "image_url", "image_url": {"url": url}}) 196 197 # Add text content 198 if text: 199 content.append({"type": "text", "text": text}) 200 201 return content 202 203 def set_structure(self, json_schema): 204 if not json_schema: 205 raise ValueError("json_schema is None") 206 207 if isinstance(json_schema, str): 208 json_schema = json.loads(json_schema) 209 210 json.dumps(json_schema) # To validate / raise an error 211 212 # LM Studio needs some more guidance 213 if self.provider == "lmstudio": 214 json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}} 215 self.llm = self.llm.bind(response_format=json_schema) 216 else: 217 self.llm = self.llm.with_structured_output(json_schema) 218 self.structured_output = True 219 220 @staticmethod 221 def get_model_options(config) -> dict: 222 """ 223 Returns model choice options for UserInput 224 """ 225 models = LLMAdapter.get_models(config) 226 if not models: 227 return {} 228 options = {model_id: model_values["name"] for model_id, model_values in models.items()} 229 return options 230 231 @staticmethod 232 def get_model_providers(config) -> dict: 233 """ 234 Returns available model providers through APIs 235 """ 236 models = LLMAdapter.get_models(config) 237 if not models: 238 return {} 239 providers = list(set([model_values.get("provider", "") for model_values in models.values()])) 240 if not providers: 241 return {} 242 options = {provider: provider.capitalize() for provider in providers if provider} 243 return options 244 245 @staticmethod 246 def get_models(config) -> dict: 247 """ 248 Returns a dict with LLM models supported by 4CAT, either through an API or as a local option. 249 Make sure to keep up-to-date! 250 251 :returns dict, A dict with model IDs as keys and details as values 252 """ 253 with ( 254 config.get("PATH_ROOT") 255 .joinpath("common/assets/llms.json") 256 .open() as available_models 257 ): 258 available_models = json.loads(available_models.read()) 259 return available_models 260 261 262 @staticmethod 263 def get_vllm_model_name(base_url: str, api_key: str = None) -> str: 264 """ 265 Query vLLM server to get the name of the served model. 266 """ 267 268 try: 269 # vLLM exposes available models at /v1/models endpoint 270 models_url = f"{base_url.rstrip('/')}/models" 271 headers = {} 272 if api_key: 273 headers["Authorization"] = f"Bearer {api_key}" 274 275 response = requests.get(models_url, headers=headers, timeout=10) 276 response.raise_for_status() 277 models_data = response.json() 278 279 # Get the first available model 280 if models_data.get("data") and len(models_data["data"]) > 0: 281 return models_data["data"][0]["id"] 282 else: 283 raise ValueError("No models found on vLLM server") 284 except Exception as e: 285 raise ValueError(f"Could not retrieve model name from vLLM server: {e}")
18 def __init__( 19 self, 20 provider: str, 21 model: str, 22 api_key: Optional[str] = None, 23 base_url: Optional[str] = None, 24 temperature: float = 0.1, 25 max_tokens: int = 1000, 26 client_kwargs: Optional[dict] = None, 27 ): 28 """ 29 provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' 30 model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) 31 api_key: API key if required (OpenAI, Claude, Google, Mistral) 32 base_url: for local models or Mistral custom endpoints 33 temperature: temperature hyperparameter, 34 max_tokens: how many output tokens may be used 35 client_kwargs: additional client parameters 36 """ 37 self.provider = provider.lower() 38 self.model = model 39 self.api_key = api_key 40 self.base_url = base_url 41 self.temperature = temperature 42 self.structured_output = False 43 self.parser = None 44 self.max_tokens = max_tokens 45 self.client_kwargs = dict(client_kwargs) if client_kwargs else {} 46 self.llm: BaseChatModel = self._load_llm()
provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) api_key: API key if required (OpenAI, Claude, Google, Mistral) base_url: for local models or Mistral custom endpoints temperature: temperature hyperparameter, max_tokens: how many output tokens may be used client_kwargs: additional client parameters
126 def generate_text( 127 self, 128 messages: Union[str, List[BaseMessage]], 129 system_prompt: Optional[str] = None, 130 temperature: float = 0.1, 131 files: Optional[List[Union[str, Path, dict]]] = None, 132 ) -> BaseMessage: 133 """ 134 Supports string input or LangChain message list, with optional multimodal files. 135 136 :param messages: Text prompt or list of LangChain messages 137 :param system_prompt: Optional system prompt 138 :param temperature: Temperature for generation 139 :param files: Optional list of file paths or content dicts for multimodal input 140 :returns: Generated response message 141 """ 142 if isinstance(messages, str): 143 lc_messages = [] 144 if system_prompt: 145 lc_messages.append(SystemMessage(content=system_prompt)) 146 147 # Create multimodal content if files are provided 148 if files: 149 multimodal_content = self.create_multimodal_content(messages, files) 150 lc_messages.append(HumanMessage(content=multimodal_content)) 151 else: 152 lc_messages.append(HumanMessage(content=messages)) 153 else: 154 lc_messages = messages 155 156 kwargs = {"temperature": temperature} 157 if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: 158 kwargs = {} 159 160 try: 161 response = self.llm.invoke(lc_messages, **kwargs) 162 except Exception as e: 163 raise e 164 165 return response
Supports string input or LangChain message list, with optional multimodal files.
Parameters
- messages: Text prompt or list of LangChain messages
- system_prompt: Optional system prompt
- temperature: Temperature for generation
- files: Optional list of file paths or content dicts for multimodal input :returns: Generated response message
167 def create_multimodal_content( 168 self, 169 text: str, 170 image_urls: Optional[List[str]] = None, 171 ) -> List[dict]: 172 """ 173 Create multimodal content structure for LangChain messages with media URLs. 174 Only supports image URLs for now. 175 176 :param text: Text content 177 :param image_urls: List of media URLs (http/https) 178 :returns: List of content blocks 179 """ 180 content = [] 181 182 # Add image URLs first 183 if image_urls: 184 for url in image_urls: 185 if not isinstance(url, str): 186 raise ValueError(f"Image URL must be a string, got {type(url)}") 187 188 # Format based on provider 189 if self.provider == "anthropic": 190 content.append( 191 {"type": "image", "source": {"type": "url", "url": url}} 192 ) 193 else: 194 # OpenAI-style format 195 content.append({"type": "image_url", "image_url": {"url": url}}) 196 197 # Add text content 198 if text: 199 content.append({"type": "text", "text": text}) 200 201 return content
Create multimodal content structure for LangChain messages with media URLs. Only supports image URLs for now.
Parameters
- text: Text content
- image_urls: List of media URLs (http/https) :returns: List of content blocks
203 def set_structure(self, json_schema): 204 if not json_schema: 205 raise ValueError("json_schema is None") 206 207 if isinstance(json_schema, str): 208 json_schema = json.loads(json_schema) 209 210 json.dumps(json_schema) # To validate / raise an error 211 212 # LM Studio needs some more guidance 213 if self.provider == "lmstudio": 214 json_schema = {"type": "json_schema", "json_schema": {"schema": json_schema}} 215 self.llm = self.llm.bind(response_format=json_schema) 216 else: 217 self.llm = self.llm.with_structured_output(json_schema) 218 self.structured_output = True
220 @staticmethod 221 def get_model_options(config) -> dict: 222 """ 223 Returns model choice options for UserInput 224 """ 225 models = LLMAdapter.get_models(config) 226 if not models: 227 return {} 228 options = {model_id: model_values["name"] for model_id, model_values in models.items()} 229 return options
Returns model choice options for UserInput
231 @staticmethod 232 def get_model_providers(config) -> dict: 233 """ 234 Returns available model providers through APIs 235 """ 236 models = LLMAdapter.get_models(config) 237 if not models: 238 return {} 239 providers = list(set([model_values.get("provider", "") for model_values in models.values()])) 240 if not providers: 241 return {} 242 options = {provider: provider.capitalize() for provider in providers if provider} 243 return options
Returns available model providers through APIs
245 @staticmethod 246 def get_models(config) -> dict: 247 """ 248 Returns a dict with LLM models supported by 4CAT, either through an API or as a local option. 249 Make sure to keep up-to-date! 250 251 :returns dict, A dict with model IDs as keys and details as values 252 """ 253 with ( 254 config.get("PATH_ROOT") 255 .joinpath("common/assets/llms.json") 256 .open() as available_models 257 ): 258 available_models = json.loads(available_models.read()) 259 return available_models
Returns a dict with LLM models supported by 4CAT, either through an API or as a local option. Make sure to keep up-to-date!
:returns dict, A dict with model IDs as keys and details as values
262 @staticmethod 263 def get_vllm_model_name(base_url: str, api_key: str = None) -> str: 264 """ 265 Query vLLM server to get the name of the served model. 266 """ 267 268 try: 269 # vLLM exposes available models at /v1/models endpoint 270 models_url = f"{base_url.rstrip('/')}/models" 271 headers = {} 272 if api_key: 273 headers["Authorization"] = f"Bearer {api_key}" 274 275 response = requests.get(models_url, headers=headers, timeout=10) 276 response.raise_for_status() 277 models_data = response.json() 278 279 # Get the first available model 280 if models_data.get("data") and len(models_data["data"]) > 0: 281 return models_data["data"][0]["id"] 282 else: 283 raise ValueError("No models found on vLLM server") 284 except Exception as e: 285 raise ValueError(f"Could not retrieve model name from vLLM server: {e}")
Query vLLM server to get the name of the served model.