LiteLLM
You can use Not Diamond to power dynamic model routing in your LiteLLM projects with only a few line changes:
Installation
pip install notdiamond
Updating your existing code
-from litellm import completion
+from notdiamond.toolkit.litellm import completion
import os
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
+os.environ["NOTDIAMOND_API_KEY"] = "your-notdiamond-api-key"
response = completion(
- model="gpt-3.5-turbo",
+ model="notdiamond/notdiamond",
+ llm_providers=[
+ {"provider": "anthropic", "model": "claude-3-5-sonnet-latest"},
+ {"provider": "openai", "model": "gpt-4o"},
+ {"provider": "openai", "model": "gpt-4o-mini"}
+ ],
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
print("Model: ", response.model)
print("Response: ", response.choices[0].message.content)
from litellm import completion
import os
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
response = completion(
model="gpt-3.5-turbo",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
print("Model: ", response.model)
print("Response: ", response.choices[0].message.content)
from notdiamond.toolkit.litellm import completion
import os
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
os.environ["NOTDIAMOND_API_KEY"] = "your-notdiamond-api-key"
response = completion(
model="notdiamond/notdiamond",
llm_providers=[
{"provider": "openai", "model": "gpt-3.5-turbo"},
{"provider": "openai", "model": "gpt-4o"}
],
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
print("Model: ", response.model)
print("Response: ", response.choices[0].message.content)
Accuracy is maximized by default, but you can also define explicit cost and latency tradeoffs to use smaller models when doing so doesn't impact quality.
Updated about 2 months ago