You can use Not Diamond to power dynamic model routing in your LiteLLM projects with only a few line changes:

Installation

pip install notdiamond

Updating your existing code

-from litellm import completion
+from notdiamond.toolkit.litellm import completion
import os

os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
+os.environ["NOTDIAMOND_API_KEY"] = "your-notdiamond-api-key"

response = completion(
- model="gpt-3.5-turbo",
+ model="notdiamond/notdiamond",
+ llm_providers=[
+     {"provider": "anthropic", "model": "claude-3-5-sonnet-latest"},
+     {"provider": "openai", "model": "gpt-4o"},
+     {"provider": "openai", "model": "gpt-4o-mini"}
+ ],
  messages=[{ "content": "Hello, how are you?","role": "user"}]
)

print("Model: ", response.model)
print("Response: ", response.choices[0].message.content)

from litellm import completion
import os

os.environ["OPENAI_API_KEY"] = "your-openai-api-key"

response = completion(
  model="gpt-3.5-turbo",
  messages=[{ "content": "Hello, how are you?","role": "user"}]
)

print("Model: ", response.model)
print("Response: ", response.choices[0].message.content)

from notdiamond.toolkit.litellm import completion
import os

os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
os.environ["NOTDIAMOND_API_KEY"] = "your-notdiamond-api-key"

response = completion(
  model="notdiamond/notdiamond",
  llm_providers=[
      {"provider": "openai", "model": "gpt-3.5-turbo"},
      {"provider": "openai", "model": "gpt-4o"}
  ],
  messages=[{ "content": "Hello, how are you?","role": "user"}]
)

print("Model: ", response.model)
print("Response: ", response.choices[0].message.content)

Accuracy is maximized by default, but you can also define explicit cost and latency tradeoffs to use smaller models when doing so doesn't impact quality.