first commit
This commit is contained in:
1
tests/integrations/python/.python-version
Normal file
1
tests/integrations/python/.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.12
|
||||
1605
tests/integrations/python/README.md
Normal file
1605
tests/integrations/python/README.md
Normal file
File diff suppressed because it is too large
Load Diff
348
tests/integrations/python/config.json
Normal file
348
tests/integrations/python/config.json
Normal file
@@ -0,0 +1,348 @@
|
||||
{
|
||||
"$schema": "https://www.getbifrost.ai/schema",
|
||||
"mcp": {
|
||||
"client_configs": [
|
||||
{
|
||||
"name": "sse_mcp",
|
||||
"connection_type": "sse",
|
||||
"connection_string": "env.MCP_SSE_URL",
|
||||
"auth_type": "headers",
|
||||
"headers": {
|
||||
"Authorization": "env.MCP_SSE_AUTHORIZATION",
|
||||
"ENV_EXA_API_KEY": "env.MCP_SSE_API_KEY"
|
||||
},
|
||||
"tools_to_execute": ["*"],
|
||||
"tools_to_auto_execute": ["*"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"providers": {
|
||||
"openai": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "OpenAI API Key",
|
||||
"value": "env.OPENAI_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": true
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"elevenlabs": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "ElevenLabs API Key",
|
||||
"value": "env.ELEVENLABS_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": false
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"xai": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Xai API Key",
|
||||
"value": "env.XAI_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": false
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"huggingface": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Hugging Face API Key",
|
||||
"value": "env.HUGGING_FACE_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": false
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"anthropic": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Anthropic API Key",
|
||||
"value": "env.ANTHROPIC_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": true
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"gemini": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Gemini API Key",
|
||||
"value": "env.GEMINI_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": true
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"vertex": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Vertex API Key",
|
||||
"vertex_key_config": {
|
||||
"project_id": "env.VERTEX_PROJECT_ID",
|
||||
"region": "env.GOOGLE_LOCATION",
|
||||
"auth_credentials": "env.VERTEX_CREDENTIALS"
|
||||
},
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"mistral": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Mistral API Key",
|
||||
"value": "env.MISTRAL_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"cohere": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Cohere API Key",
|
||||
"value": "env.COHERE_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"parasail": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Parasail API Key",
|
||||
"value": "env.PARASAIL_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"groq": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Groq API Key",
|
||||
"value": "env.GROQ_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"perplexity": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Perplexity API Key",
|
||||
"value": "env.PERPLEXITY_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"cerebras": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Cerebras API Key",
|
||||
"value": "env.CEREBRAS_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"openrouter": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "OpenRouter API Key",
|
||||
"value": "env.OPENROUTER_API_KEY",
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"azure": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Azure API Key",
|
||||
"value": "env.AZURE_API_KEY",
|
||||
"azure_key_config": {
|
||||
"endpoint": "env.AZURE_ENDPOINT",
|
||||
"api_version": "env.AZURE_API_VERSION",
|
||||
"deployments": {
|
||||
"gpt-4o": "gpt-4o",
|
||||
"gpt-4o-mini": "gpt-4o-mini",
|
||||
"gpt-4o-mini-tts": "gpt-4o-mini-tts",
|
||||
"o1": "o1",
|
||||
"gpt-4o-batch": "gpt-4o-batch",
|
||||
"whisper": "whisper",
|
||||
"text-embedding-3-small": "text-embedding-3-small",
|
||||
"gpt-image-1": "gpt-image-1"
|
||||
}
|
||||
},
|
||||
"weight": 1,
|
||||
"models": ["*"]
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"bedrock": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Bedrock API Key",
|
||||
"bedrock_key_config": {
|
||||
"access_key": "env.AWS_ACCESS_KEY_ID",
|
||||
"secret_key": "env.AWS_SECRET_ACCESS_KEY",
|
||||
"region": "env.AWS_REGION",
|
||||
"arn": "env.AWS_ARN"
|
||||
},
|
||||
"weight": 1,
|
||||
"models": ["*"],
|
||||
"use_for_batch_api": true
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"replicate": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Replicate API Key",
|
||||
"value": "env.REPLICATE_API_KEY",
|
||||
"weight": 1
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"runway": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Runway API Key",
|
||||
"value": "env.RUNWAY_API_KEY",
|
||||
"weight": 1
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
},
|
||||
"nebius": {
|
||||
"keys": [
|
||||
{
|
||||
"name": "Nebius API Key",
|
||||
"value": "env.NEBIUS_API_KEY",
|
||||
"weight": 1
|
||||
}
|
||||
],
|
||||
"network_config": {
|
||||
"default_request_timeout_in_seconds": 300
|
||||
}
|
||||
}
|
||||
},
|
||||
"config_store": {
|
||||
"enabled": true,
|
||||
"type": "sqlite",
|
||||
"config": {
|
||||
"path": "./config.db"
|
||||
}
|
||||
},
|
||||
"logs_store": {
|
||||
"enabled": true,
|
||||
"type": "sqlite",
|
||||
"config": {
|
||||
"path": "./logs.db"
|
||||
}
|
||||
},
|
||||
"governance": {
|
||||
"virtual_keys": [
|
||||
{
|
||||
"name": "Test Key",
|
||||
"id": "vk-test",
|
||||
"value": "sk-bf-test-key",
|
||||
"is_active": true,
|
||||
"provider_configs": [
|
||||
{ "provider": "openai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "elevenlabs", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "xai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "huggingface", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "anthropic", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "gemini", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "vertex", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "mistral", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "cohere", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "parasail", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "groq", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "perplexity", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "cerebras", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "openrouter", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "azure", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
|
||||
{ "provider": "bedrock", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 }
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"client": {
|
||||
"drop_excess_requests": false,
|
||||
"initial_pool_size": 300,
|
||||
"allowed_origins": [
|
||||
"*"
|
||||
],
|
||||
"enable_logging": true,
|
||||
"enforce_auth_on_inference": false,
|
||||
"allow_direct_keys": false,
|
||||
"max_request_body_size_mb": 100
|
||||
}
|
||||
}
|
||||
942
tests/integrations/python/config.yml
Normal file
942
tests/integrations/python/config.yml
Normal file
@@ -0,0 +1,942 @@
|
||||
# Bifrost Integration Tests Configuration
|
||||
# This file centralizes all configuration for AI integration clients and test settings
|
||||
|
||||
# Bifrost Gateway Configuration
|
||||
# All integrations route through Bifrost as a proxy/gateway
|
||||
bifrost:
|
||||
base_url: "${BIFROST_BASE_URL:-http://localhost:8080}"
|
||||
|
||||
# Integration-specific endpoints (suffixes appended to base_url)
|
||||
endpoints:
|
||||
openai: "openai"
|
||||
anthropic: "anthropic"
|
||||
cohere: "cohere"
|
||||
google: "genai"
|
||||
"gemini_passthrough": "genai_passthrough"
|
||||
"anthropic_passthrough": "anthropic_passthrough"
|
||||
litellm: "litellm"
|
||||
langchain: "langchain"
|
||||
pydanticai: "pydanticai"
|
||||
bedrock: "bedrock"
|
||||
azure: "openai" # Azure uses OpenAI routes with /openai/deployments/{id} paths
|
||||
|
||||
# Full URLs constructed as: {base_url.rstrip('/')}/{endpoints[integration]}
|
||||
# Examples:
|
||||
# - OpenAI: http://localhost:8080/openai
|
||||
# - Anthropic: http://localhost:8080/anthropic
|
||||
# - Google: http://localhost:8080/genai
|
||||
# - LiteLLM: http://localhost:8080/litellm
|
||||
# - LangChain: http://localhost:8080/langchain
|
||||
|
||||
# API Configuration
|
||||
api:
|
||||
timeout: 30 # seconds
|
||||
max_retries: 3
|
||||
retry_delay: 1 # seconds
|
||||
|
||||
# Provider model configurations
|
||||
# Integrations (openai, anthropic, google, litellm, langchain) map to these providers
|
||||
providers:
|
||||
openai:
|
||||
chat: "gpt-4o"
|
||||
vision: "gpt-4o"
|
||||
file: "gpt-4o"
|
||||
tools: "gpt-4o-mini"
|
||||
speech: "tts-1"
|
||||
transcription: "whisper-1"
|
||||
embeddings: "text-embedding-3-small"
|
||||
image_generation: "gpt-image-1"
|
||||
image_edit: "gpt-image-1"
|
||||
streaming: "gpt-4o-mini"
|
||||
thinking: "gpt-5.1"
|
||||
batch_file_upload: "gpt-4o-mini"
|
||||
batch_list: "gpt-4o"
|
||||
batch_retrieve: "gpt-4o"
|
||||
batch_cancel: "gpt-4o"
|
||||
batch_inline: "gpt-4o"
|
||||
file_upload: "gpt-4o-mini"
|
||||
file_list: "gpt-4o-mini"
|
||||
file_retrieve: "gpt-4o-mini"
|
||||
file_delete: "gpt-4o-mini"
|
||||
file_content: "gpt-4o-mini"
|
||||
count_tokens: "gpt-4o-mini"
|
||||
video: "sora-2"
|
||||
alternatives:
|
||||
- "gpt-4"
|
||||
- "gpt-4-turbo-preview"
|
||||
- "gpt-4o"
|
||||
- "gpt-3.5-turbo"
|
||||
|
||||
azure:
|
||||
chat: "gpt-4o"
|
||||
vision: "gpt-4o"
|
||||
tools: "gpt-4o-mini"
|
||||
streaming: "gpt-4o-mini"
|
||||
speech: "gpt-4o-mini-tts"
|
||||
transcription: "whisper"
|
||||
embeddings: "text-embedding-3-small"
|
||||
image_generation: "gpt-image-1"
|
||||
thinking: "o1"
|
||||
batch_file_upload: "gpt-4o-batch"
|
||||
batch_list: "gpt-4o-batch"
|
||||
batch_retrieve: "gpt-4o-batch"
|
||||
batch_cancel: "gpt-4o-batch"
|
||||
file_upload: "gpt-4o"
|
||||
file_list: "gpt-4o"
|
||||
file_retrieve: "gpt-4o"
|
||||
file_delete: "gpt-4o"
|
||||
file_content: "gpt-4o"
|
||||
|
||||
xai:
|
||||
chat: "grok-4-0709"
|
||||
vision: "grok-2-vision-1212"
|
||||
tools: "grok-4-0709"
|
||||
streaming: "grok-4-0709"
|
||||
thinking: "grok-3-mini"
|
||||
|
||||
anthropic:
|
||||
chat: "claude-sonnet-4-5-20250929"
|
||||
vision: "claude-sonnet-4-5-20250929"
|
||||
file: "claude-sonnet-4-5-20250929"
|
||||
tools: "claude-sonnet-4-5-20250929"
|
||||
streaming: "claude-sonnet-4-5-20250929"
|
||||
thinking: "claude-opus-4-5"
|
||||
batch_file_upload: "claude-sonnet-4-20250514"
|
||||
batch_inline: "claude-sonnet-4-20250514"
|
||||
batch_list: "claude-sonnet-4-20250514"
|
||||
batch_retrieve: "claude-sonnet-4-20250514"
|
||||
batch_cancel: "claude-sonnet-4-20250514"
|
||||
file_upload: "claude-sonnet-4-20250514"
|
||||
file_list: "claude-sonnet-4-20250514"
|
||||
file_retrieve: "claude-sonnet-4-20250514"
|
||||
file_delete: "claude-sonnet-4-20250514"
|
||||
file_content: "claude-sonnet-4-20250514"
|
||||
count_tokens: "claude-sonnet-4-5-20250929"
|
||||
alternatives:
|
||||
- "claude-3-sonnet-20240229"
|
||||
- "claude-3-opus-20240229"
|
||||
- "claude-3-5-sonnet-20241022"
|
||||
- "claude-3-haiku-20240307"
|
||||
|
||||
gemini:
|
||||
chat: "gemini-3-flash-preview"
|
||||
vision: "gemini-3-flash-preview"
|
||||
tools: "gemini-3-flash-preview"
|
||||
file: "gemini-2.5-flash"
|
||||
thinking: "gemini-3-pro-preview"
|
||||
speech: "gemini-2.5-flash-preview-tts"
|
||||
transcription: "gemini-2.5-flash"
|
||||
embeddings: "gemini-embedding-001"
|
||||
image_generation: "gemini-2.5-flash-image"
|
||||
image_edit: "gemini-3-pro-image-preview"
|
||||
imagen: "imagen-4.0-generate-001"
|
||||
video: "veo-3.1-generate-preview"
|
||||
streaming: "gemini-3-flash-preview"
|
||||
batch_create: "gemini-2.5-flash"
|
||||
batch_inline: "gemini-2.5-flash"
|
||||
batch_file_upload: "gemini-2.5-flash"
|
||||
batch_list: "gemini-2.5-flash"
|
||||
batch_retrieve: "gemini-2.5-flash"
|
||||
batch_cancel: "gemini-2.5-flash"
|
||||
batch_s3: "gemini-2.5-flash"
|
||||
file_upload: "gemini-2.0-flash"
|
||||
file_list: "gemini-2.0-flash"
|
||||
file_content: "gemini-2.0-flash"
|
||||
file_download: "gemini-2.0-flash"
|
||||
file_retrieve: "gemini-2.0-flash"
|
||||
file_delete: "gemini-2.0-flash"
|
||||
count_tokens: "gemini-2.5-flash"
|
||||
alternatives:
|
||||
- "gemini-1.5-pro"
|
||||
- "gemini-1.5-flash"
|
||||
- "gemini-1.0-pro"
|
||||
- "gemini-2.0-flash-001"
|
||||
|
||||
vertex:
|
||||
chat: "gemini-2.5-flash"
|
||||
vision: "claude-sonnet-4-5"
|
||||
tools: "gemini-2.5-flash"
|
||||
file: "claude-sonnet-4-5"
|
||||
thinking: "gemini-2.5-pro"
|
||||
embeddings: "gemini-embedding-001"
|
||||
image_generation: "imagen-4.0-generate-001"
|
||||
image_edit: "imagen-3.0-capability-001"
|
||||
imagen: "imagen-4.0-generate-001"
|
||||
streaming: "gemini-2.5-flash"
|
||||
count_tokens: "claude-sonnet-4-5"
|
||||
video: "veo-3.1-generate-preview"
|
||||
bedrock:
|
||||
chat: "global.anthropic.claude-sonnet-4-20250514-v1:0"
|
||||
vision: "global.anthropic.claude-sonnet-4-20250514-v1:0"
|
||||
file: "global.anthropic.claude-sonnet-4-20250514-v1:0"
|
||||
tools: "global.anthropic.claude-sonnet-4-20250514-v1:0"
|
||||
streaming: "global.anthropic.claude-sonnet-4-20250514-v1:0"
|
||||
thinking: "us.anthropic.claude-opus-4-5-20251101-v1:0"
|
||||
text_completion: "mistral.mistral-7b-instruct-v0:2"
|
||||
embeddings: "global.cohere.embed-v4:0"
|
||||
image_generation: "amazon.titan-image-generator-v2:0"
|
||||
image_variation: "amazon.titan-image-generator-v2:0"
|
||||
batch_inline: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
image_edit: "amazon.nova-canvas-v1:0"
|
||||
batch_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
batch_retrieve: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
batch_cancel: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
batch_file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
batch_s3: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
file_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
file_delete: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
file_content: "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
count_tokens: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
|
||||
alternatives:
|
||||
- "anthropic.claude-3-opus-20240229-v1:0"
|
||||
|
||||
cohere:
|
||||
chat: "command-a-03-2025"
|
||||
vision: "command-a-vision-07-2025"
|
||||
tools: "command-a-03-2025"
|
||||
embeddings: "embed-v4.0"
|
||||
streaming: "command-a-03-2025"
|
||||
count_tokens: "command-a-03-2025"
|
||||
alternatives:
|
||||
- "command-r-plus"
|
||||
|
||||
huggingface:
|
||||
image_generation: "fal-ai/fal-ai/flux/dev"
|
||||
image_edit: "fal-ai/fal-ai/flux-2/edit"
|
||||
|
||||
nebius:
|
||||
image_generation: "black-forest-labs/flux-schnell"
|
||||
|
||||
replicate:
|
||||
video: "openai/sora-2-pro"
|
||||
|
||||
runway:
|
||||
video: "gen4.5"
|
||||
|
||||
|
||||
|
||||
# Provider availability configuration
|
||||
# Maps provider names to their API key environment variables
|
||||
provider_api_keys:
|
||||
openai: "OPENAI_API_KEY"
|
||||
anthropic: "ANTHROPIC_API_KEY"
|
||||
gemini: "GEMINI_API_KEY"
|
||||
vertex: "VERTEX_API_KEY"
|
||||
bedrock: "AWS_ACCESS_KEY_ID"
|
||||
cohere: "COHERE_API_KEY"
|
||||
xai: "XAI_API_KEY"
|
||||
huggingface: "HUGGING_FACE_API_KEY"
|
||||
nebius: "NEBIUS_API_KEY"
|
||||
azure: "AZURE_API_KEY"
|
||||
replicate: "REPLICATE_API_KEY"
|
||||
runway: "RUNWAY_API_KEY"
|
||||
|
||||
# Provider test scenarios - which tests each provider supports
|
||||
provider_scenarios:
|
||||
openai:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
"web_search": true
|
||||
image_url: true
|
||||
image_base64: true
|
||||
file_input: true
|
||||
multiple_images: true
|
||||
speech_synthesis: true
|
||||
speech_synthesis_streaming: true
|
||||
transcription: true
|
||||
transcription_streaming: true
|
||||
embeddings: true
|
||||
image_generation: true
|
||||
image_edit: true
|
||||
thinking: true
|
||||
prompt_caching: false
|
||||
citations: false
|
||||
list_models: true
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: true
|
||||
pydantic_structured_output: true # PydanticAI structured output works reliably with OpenAI
|
||||
pydanticai_streaming: true # PydanticAI streaming works with OpenAI
|
||||
batch_file_upload: true
|
||||
batch_create: true
|
||||
batch_list: true
|
||||
batch_retrieve: true
|
||||
batch_cancel: true
|
||||
batch_inline: true # OpenAI supports inline requests for batch
|
||||
batch_s3: false # OpenAI does not use S3 for batch
|
||||
file_upload: true
|
||||
file_list: true
|
||||
file_retrieve: true
|
||||
file_delete: true
|
||||
file_content: true
|
||||
count_tokens: true
|
||||
video_generation: false # disabled for now because of long running operations
|
||||
|
||||
azure:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
web_search: false
|
||||
image_url: true
|
||||
image_base64: true
|
||||
file_input: false
|
||||
multiple_images: true
|
||||
speech_synthesis: true
|
||||
speech_synthesis_streaming: true
|
||||
transcription: true
|
||||
transcription_streaming: true
|
||||
embeddings: true
|
||||
image_generation: true
|
||||
image_edit: false
|
||||
thinking: true
|
||||
prompt_caching: false
|
||||
citations: false
|
||||
list_models: true
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: false
|
||||
pydantic_structured_output: false
|
||||
pydanticai_streaming: false
|
||||
batch_file_upload: true
|
||||
batch_create: false
|
||||
batch_list: true
|
||||
batch_retrieve: true
|
||||
batch_cancel: true
|
||||
batch_inline: false
|
||||
batch_s3: false
|
||||
file_upload: true
|
||||
file_list: true
|
||||
file_retrieve: true
|
||||
file_delete: true
|
||||
file_content: true
|
||||
count_tokens: false
|
||||
|
||||
xai:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
image_url: true
|
||||
image_base64: false
|
||||
file_input: false
|
||||
multiple_images: false
|
||||
thinking: true
|
||||
list_models: true
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: true
|
||||
pydantic_structured_output: true
|
||||
pydanticai_streaming: true
|
||||
|
||||
anthropic:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
web_search: true
|
||||
image_url: true
|
||||
image_base64: true
|
||||
file_input: true
|
||||
file_input_text: true
|
||||
multiple_images: true
|
||||
speech_synthesis: false
|
||||
speech_synthesis_streaming: false
|
||||
transcription: false
|
||||
transcription_streaming: false
|
||||
embeddings: false
|
||||
thinking: true
|
||||
prompt_caching: true
|
||||
citations: true
|
||||
list_models: true
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: false
|
||||
pydantic_structured_output: true # PydanticAI structured output works with Anthropic
|
||||
pydanticai_streaming: true # PydanticAI streaming works with Anthropic
|
||||
batch_file_upload: true # Anthropic batch API uses inline requests, not files
|
||||
batch_create: true
|
||||
batch_list: true
|
||||
batch_retrieve: true
|
||||
batch_cancel: true
|
||||
batch_inline: true # Anthropic uses inline requests for batch
|
||||
batch_s3: false # Anthropic does not use S3 for batch
|
||||
file_upload: true
|
||||
file_list: true
|
||||
file_retrieve: true
|
||||
file_delete: true
|
||||
file_content: true
|
||||
count_tokens: true
|
||||
|
||||
gemini:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
image_url: false # Gemini requires base64 or file upload
|
||||
image_base64: true
|
||||
file_input: true
|
||||
multiple_images: false
|
||||
speech_synthesis: true
|
||||
speech_synthesis_streaming: true
|
||||
transcription: true
|
||||
transcription_streaming: true
|
||||
embeddings: true
|
||||
image_generation: true # Gemini image generation via responseModalities
|
||||
image_edit: true # Gemini image editing
|
||||
imagen: true # Imagen via :predict endpoint
|
||||
imagen_edit: true # Imagen editing via image_edit model
|
||||
thinking: true
|
||||
video_generation: false # disabled for now because of long running operations
|
||||
prompt_caching: false
|
||||
citations: false
|
||||
list_models: true
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: true
|
||||
pydantic_structured_output: false # PydanticAI structured output unreliable via Bifrost for Gemini
|
||||
pydanticai_streaming: false # PydanticAI GoogleModel streaming has asyncio issues
|
||||
batch_file_upload: true # Gemini supports file upload via Files API
|
||||
batch_create: true
|
||||
batch_list: true
|
||||
batch_retrieve: true
|
||||
batch_cancel: true
|
||||
batch_inline: true # Gemini uses inline requests for batch (synchronous)
|
||||
batch_s3: false # Gemini does not use S3 for batch
|
||||
file_upload: true
|
||||
file_list: true
|
||||
file_retrieve: true
|
||||
file_delete: true
|
||||
file_content: false # Gemini doesn't support direct file download
|
||||
count_tokens: true
|
||||
context_caching: true # Gemini context caching (Caches API) via Bifrost passthrough
|
||||
|
||||
vertex:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
image_url: false # Gemini requires base64 or file upload
|
||||
image_base64: true
|
||||
file_input: true
|
||||
multiple_images: false
|
||||
speech_synthesis: false
|
||||
speech_synthesis_streaming: false
|
||||
transcription: false
|
||||
transcription_streaming: false
|
||||
embeddings: true
|
||||
image_generation: true
|
||||
image_edit: true
|
||||
imagen: true # Imagen via :predict endpoint
|
||||
imagen_edit: true # Imagen editing via image_edit model
|
||||
thinking: true
|
||||
prompt_caching: false
|
||||
list_models: true
|
||||
video_generation: false # disabled for now because of long running operations
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: true
|
||||
pydantic_structured_output: false # PydanticAI structured output unreliable via Bifrost for Gemini
|
||||
pydanticai_streaming: false # PydanticAI GoogleModel streaming has asyncio issues
|
||||
batch_file_upload: false # Gemini supports file upload via Files API
|
||||
batch_create: false
|
||||
batch_list: false
|
||||
batch_retrieve: false
|
||||
batch_cancel: false
|
||||
batch_inline: false # Gemini uses inline requests for batch (synchronous)
|
||||
batch_s3: false # Gemini does not use S3 for batch
|
||||
file_upload: false
|
||||
file_list: false
|
||||
file_retrieve: false
|
||||
file_delete: false
|
||||
file_content: false # Gemini doesn't support direct file download
|
||||
count_tokens: false
|
||||
|
||||
bedrock:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: true
|
||||
image_url: false
|
||||
image_base64: true
|
||||
file_input: true
|
||||
file_input_text: true
|
||||
multiple_images: false
|
||||
speech_synthesis: false
|
||||
speech_synthesis_streaming: false
|
||||
transcription: false
|
||||
transcription_streaming: false
|
||||
embeddings: true
|
||||
thinking: true
|
||||
prompt_caching: true
|
||||
citations: false
|
||||
list_models: true
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: true
|
||||
pydantic_structured_output: false # Bedrock not supported in PydanticAI tests
|
||||
pydanticai_streaming: false # Bedrock not supported in PydanticAI tests
|
||||
batch_file_upload: true # Bedrock uses S3 wrapper for file uploads
|
||||
batch_create: true
|
||||
batch_list: true
|
||||
batch_retrieve: true
|
||||
batch_cancel: true
|
||||
batch_inline: false # Bedrock batch uses S3, not inline requests via API
|
||||
batch_s3: true # Bedrock uses S3 for batch input/output
|
||||
file_upload: true # Bedrock uses S3 wrapper for file storage
|
||||
file_list: true # Bedrock lists files in S3 bucket
|
||||
file_retrieve: true # Bedrock retrieves S3 object metadata
|
||||
file_delete: true # Bedrock deletes S3 objects
|
||||
file_content: true # Bedrock downloads S3 object content
|
||||
image_generation: true # Bedrock supports image generation via invoke (Titan, SA, cross-provider)
|
||||
image_edit: true # Bedrock supports image editing via invoke (Titan, SA)
|
||||
image_variation: true # Bedrock supports image variation via invoke (Titan IMAGE_VARIATION)
|
||||
count_tokens: true # Bedrock supports token counting via CountTokens API
|
||||
|
||||
cohere:
|
||||
simple_chat: true
|
||||
multi_turn_conversation: true
|
||||
streaming: true
|
||||
tool_calls: true
|
||||
multiple_tool_calls: true
|
||||
end2end_tool_calling: true
|
||||
automatic_function_calling: false
|
||||
image_url: true
|
||||
image_base64: true
|
||||
multiple_images: true
|
||||
speech_synthesis: false
|
||||
speech_synthesis_streaming: false
|
||||
transcription: false
|
||||
transcription_streaming: false
|
||||
embeddings: true
|
||||
thinking: false
|
||||
prompt_caching: false
|
||||
citations: false
|
||||
list_models: false
|
||||
responses: true
|
||||
responses_image: true
|
||||
text_completion: false
|
||||
langchain_structured_output: true
|
||||
pydantic_structured_output: false # PydanticAI CohereModel doesn't reliably support structured output
|
||||
pydanticai_streaming: false # PydanticAI CohereModel doesn't implement streaming
|
||||
batch_file_upload: false
|
||||
batch_create: false
|
||||
batch_list: false
|
||||
batch_retrieve: false
|
||||
batch_cancel: false
|
||||
batch_inline: false # Cohere does not support batch API
|
||||
batch_s3: false # Cohere does not support batch API
|
||||
file_upload: false # Cohere does not support Files API
|
||||
file_list: false
|
||||
file_retrieve: false
|
||||
file_delete: false
|
||||
file_content: false
|
||||
count_tokens: true
|
||||
|
||||
huggingface:
|
||||
image_generation: true
|
||||
image_edit: true
|
||||
|
||||
nebius:
|
||||
image_generation: true
|
||||
|
||||
replicate:
|
||||
video_generation: false # disabled for now because of long running operations
|
||||
|
||||
runway:
|
||||
video_generation: false # disabled for now because of long running operations
|
||||
|
||||
# Scenario to capability mapping
|
||||
# Maps test scenario names to their corresponding capability types
|
||||
scenario_capabilities:
|
||||
simple_chat: "chat"
|
||||
multi_turn_conversation: "chat"
|
||||
responses: "chat"
|
||||
responses_image: "vision"
|
||||
text_completion: "chat"
|
||||
streaming: "streaming"
|
||||
tool_calls: "tools"
|
||||
multiple_tool_calls: "tools"
|
||||
end2end_tool_calling: "tools"
|
||||
automatic_function_calling: "tools"
|
||||
web_search: "chat"
|
||||
image_url: "vision"
|
||||
image_base64: "vision"
|
||||
file_input: "file"
|
||||
file_input_text: "file"
|
||||
multiple_images: "vision"
|
||||
speech_synthesis: "speech"
|
||||
speech_synthesis_streaming: "speech"
|
||||
transcription: "transcription"
|
||||
transcription_streaming: "transcription"
|
||||
embeddings: "embeddings"
|
||||
image_generation: "image_generation" # Uses image_generation model
|
||||
image_edit: "image_edit" # Uses image_edit model
|
||||
imagen: "imagen" # Uses imagen model (Gemini/Vertex)
|
||||
imagen_edit: "image_edit" # Uses image_edit model for Imagen editing
|
||||
thinking: "thinking"
|
||||
prompt_caching: "chat"
|
||||
citations: "chat"
|
||||
list_models: "chat"
|
||||
langchain_structured_output: "chat" # LangChain structured output uses chat capability
|
||||
count_tokens: "count_tokens" # Token counting capability
|
||||
pydantic_structured_output: "chat" # Structured output uses chat capability
|
||||
pydanticai_streaming: "streaming" # PydanticAI streaming uses streaming capability
|
||||
batch_file_upload: "batch_file_upload" # Uses batch_file_upload model directly
|
||||
batch_create: "batch_create"
|
||||
batch_list: "batch_list"
|
||||
batch_retrieve: "batch_retrieve"
|
||||
batch_cancel: "batch_cancel"
|
||||
batch_inline: "batch_inline" # Uses batch_inline model directly
|
||||
batch_s3: "batch_s3" # Uses batch_s3 model directly
|
||||
file_upload: "file_upload" # Uses file_upload model directly
|
||||
file_list: "file_list" # Uses file_list model directly
|
||||
file_retrieve: "file_retrieve" # Uses file_retrieve model directly
|
||||
file_delete: "file_delete" # Uses file_delete model directly
|
||||
file_content: "file_content" # Uses file_content model directly
|
||||
count_tokens: "chat"
|
||||
video_generation: "video"
|
||||
context_caching: "chat" # Gemini Caches API (passthrough)
|
||||
|
||||
# Model capabilities matrix
|
||||
model_capabilities:
|
||||
# OpenAI Models
|
||||
"gpt-3.5-turbo":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: false
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 4096
|
||||
|
||||
"gpt-4":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: false
|
||||
streaming: true
|
||||
max_tokens: 8192
|
||||
context_window: 8192
|
||||
|
||||
"gpt-4o":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 128000
|
||||
|
||||
"gpt-4o-mini":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
speech: false
|
||||
transcription: false
|
||||
max_tokens: 4096
|
||||
context_window: 128000
|
||||
|
||||
# OpenAI Speech Models
|
||||
"tts-1":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: true
|
||||
transcription: false
|
||||
max_tokens: null
|
||||
context_window: null
|
||||
|
||||
"tts-1-hd":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: true
|
||||
transcription: false
|
||||
max_tokens: null
|
||||
context_window: null
|
||||
|
||||
# OpenAI Transcription Models
|
||||
"whisper-1":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: false
|
||||
transcription: true
|
||||
embeddings: false
|
||||
max_tokens: null
|
||||
context_window: null
|
||||
|
||||
# OpenAI Embedding Models
|
||||
"text-embedding-3-small":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: false
|
||||
transcription: false
|
||||
embeddings: true
|
||||
max_tokens: null
|
||||
context_window: 8191
|
||||
dimensions: 1536
|
||||
|
||||
"text-embedding-3-large":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: false
|
||||
transcription: false
|
||||
embeddings: true
|
||||
max_tokens: null
|
||||
context_window: 8191
|
||||
dimensions: 3072
|
||||
|
||||
"text-embedding-ada-002":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: false
|
||||
transcription: false
|
||||
embeddings: true
|
||||
max_tokens: null
|
||||
context_window: 8191
|
||||
dimensions: 1536
|
||||
|
||||
# Anthropic Models
|
||||
"claude-3-haiku-20240307":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 200000
|
||||
|
||||
"claude-3-sonnet-20240229":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 200000
|
||||
|
||||
"claude-3-opus-20240229":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 200000
|
||||
|
||||
# Google Models
|
||||
"gemini-pro":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: false
|
||||
streaming: true
|
||||
max_tokens: 8192
|
||||
context_window: 32768
|
||||
|
||||
"gemini-2.0-flash-001":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
max_tokens: 8192
|
||||
context_window: 32768
|
||||
|
||||
"gemini-1.5-pro":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
max_tokens: 8192
|
||||
context_window: 1000000
|
||||
|
||||
# Gemini Transcription Models
|
||||
"gemini-2.5-flash":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
speech: false
|
||||
transcription: true
|
||||
embeddings: false
|
||||
max_tokens: 8192
|
||||
context_window: 1000000
|
||||
audio_max_duration: 34200 # 9.5 hours in seconds
|
||||
|
||||
"gemini-2.5-pro":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: true
|
||||
streaming: true
|
||||
speech: false
|
||||
transcription: true
|
||||
embeddings: false
|
||||
max_tokens: 8192
|
||||
context_window: 2000000
|
||||
audio_max_duration: 34200 # 9.5 hours in seconds
|
||||
|
||||
# Gemini TTS Models
|
||||
"gemini-2.5-flash-preview-tts":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: true
|
||||
transcription: false
|
||||
embeddings: false
|
||||
max_tokens: 32000 # 32k token context window for TTS
|
||||
context_window: 32000
|
||||
audio_format: "pcm"
|
||||
sample_rate: 24000
|
||||
channels: 1
|
||||
|
||||
"gemini-2.5-pro-preview-tts":
|
||||
chat: false
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: false
|
||||
speech: true
|
||||
transcription: false
|
||||
embeddings: false
|
||||
max_tokens: 32000 # 32k token context window for TTS
|
||||
context_window: 32000
|
||||
audio_format: "pcm"
|
||||
sample_rate: 24000
|
||||
channels: 1
|
||||
|
||||
# Mistral Models
|
||||
"mistral-7b-instruct":
|
||||
chat: true
|
||||
tools: false
|
||||
vision: false
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 32768
|
||||
|
||||
"mistral-8x7b-instruct":
|
||||
chat: true
|
||||
tools: true
|
||||
vision: false
|
||||
streaming: true
|
||||
max_tokens: 4096
|
||||
context_window: 32768
|
||||
|
||||
# Test configuration
|
||||
test_settings:
|
||||
# Maximum tokens for test responses
|
||||
max_tokens:
|
||||
chat: 100
|
||||
vision: 200
|
||||
tools: 100
|
||||
complex: 300
|
||||
speech: null # Speech doesn't use token limits
|
||||
transcription: null # Transcription doesn't use token limits
|
||||
embeddings: null # Embeddings don't use token limits (text is the input)
|
||||
|
||||
# Timeout settings for tests
|
||||
timeouts:
|
||||
simple: 30 # seconds
|
||||
complex: 60 # seconds
|
||||
|
||||
# Retry settings for flaky tests
|
||||
retries:
|
||||
max_attempts: 3
|
||||
delay: 2 # seconds
|
||||
|
||||
# Integration-specific settings
|
||||
integration_settings:
|
||||
openai:
|
||||
organization: "${OPENAI_ORG_ID:-}"
|
||||
project: "${OPENAI_PROJECT_ID:-}"
|
||||
|
||||
anthropic:
|
||||
version: "2023-06-01"
|
||||
|
||||
google:
|
||||
project_id: "${GOOGLE_PROJECT_ID:-}"
|
||||
location: "${GOOGLE_LOCATION:-us-central1}"
|
||||
|
||||
litellm:
|
||||
drop_params: true
|
||||
debug: false
|
||||
|
||||
langchain:
|
||||
debug: false
|
||||
streaming: true
|
||||
|
||||
bedrock:
|
||||
region: "${AWS_REGION:-us-west-2}"
|
||||
s3_bucket: "${AWS_S3_BUCKET:-}"
|
||||
batch_role_arn: "${AWS_ARN:-}"
|
||||
output_s3_prefix: "${AWS_OUTPUT_S3_PREFIX:-bifrost-batch-output/}"
|
||||
|
||||
azure:
|
||||
api_version: "${AZURE_API_VERSION:-2024-10-21}"
|
||||
|
||||
# Environment-specific overrides
|
||||
environments:
|
||||
development:
|
||||
api:
|
||||
timeout: 60
|
||||
max_retries: 5
|
||||
test_settings:
|
||||
timeouts:
|
||||
simple: 60
|
||||
complex: 120
|
||||
|
||||
production:
|
||||
api:
|
||||
timeout: 15
|
||||
max_retries: 2
|
||||
test_settings:
|
||||
timeouts:
|
||||
simple: 20
|
||||
complex: 40
|
||||
|
||||
# Virtual key testing configuration
|
||||
# When enabled, cross-provider tests will run twice: with and without the x-bf-vk header
|
||||
virtual_key:
|
||||
enabled: true
|
||||
value: "sk-bf-test-key"
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
level: "INFO"
|
||||
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
file: "tests.log"
|
||||
12
tests/integrations/python/dummy-gcp-credentials.json
Normal file
12
tests/integrations/python/dummy-gcp-credentials.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "dummy-bifrost-project",
|
||||
"private_key_id": "dummy-key-id-12345",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCY+aj4fvYTj4l9\nYcgnEg7f9Y2zcck8bvYrhIY/m0NJpfUV2rOAbvgHJXUgobmUcgf6E9b76AWVN/Wm\nk6dxE+PWj1/DwkaYk4uDHpWFOn6HkF7ypLeGMamSnU+OfKFoUrRW8NfoMgh+uGVt\nwMh82qBztaTJKjN2BlxBepgR0iZKG81ySkyhaUL1Jh99E3AcNULkkp+VHTD51lw6\n4H0B197tY18GUZ+iPK3Laj9HBVOAjxqsCs4cMsWZ16R+dfZr8ZcDC0zodhcVNSsX\nA7uKZ5tKChJQEzHhk4o8ywnrsyd4E9FHKHsbs+Ye7K5qrTOxpKpDiy6DOPlknZ0/\nzLjk+SARAgMBAAECggEAAepUIktYZnmvblI//Sj8rHdJRoJGOqxNcnaW+4b+euUW\nQ1CspV1+U51amCBvza6kZ+0gaKEhi3lAAhfYQFx5YGtHTbHtKwjTL6oDrKKTncx9\nz/oJYeV6vVTOGGCjZQx2f30DwJZE0XG/1Qpl6L9SSBv14HlwY/6US75snRsWvCAc\nYlWJCozpn5ycSZDbqQBSVF3ueeHoH4ahL5Iw2NELAk87HLBGbtfvfwWpnQqlJtIi\ndQWqYYBHrqk5ThQNcJl1o3oBY7MMVE6/jWZbr+aIXtgfQlG3j+Z6PD6/7g3z65Yy\nxiOrypqpEm8UhmmxoF7UjSci+32NLj7SfdgXM2QLgwKBgQDP3iOCWBmzK72tWeF9\nkNKTXn/6niJHgaPhVTVYsAEoQMcWBtGSUF7QRWZt4qu8APRTwhThFpLVLSZOk50I\ndf9xFhWqubsif/ox1Fbd3SGswwoWhHvGHQ/JH/75akpMKTXkkLDomNNL49kNwW1E\nmb1EJPOeyuOxhw8gP7v8qJ8cAwKBgQC8ZaLgYUmMsc+IDpREN22fXNmoBE0OgrIK\nBLa9rQRAbzdmKcNxLpGAEsyiuPOrgD/9U2G9hM6kztCN53Ho86rphFiHHgN2NJfH\n/Jz/jTtM3UPKv0QCHuLTZknLFeYE3A0jNYFpRi/hjy2n0E4Gtp/0Y0ZULseMSvM5\naN7CWGS5WwKBgQDPCWb+vTcjwO5UCdDQ2v0RsS7w9K4Z4KLUnaTbp7oPWK2yX6o+\n+/PjpywFSJ5aS+0Ou6FGK9ClqSmdW+MteTGqdh+wgvtDuon9NYwrwMN4qm6SzPPm\n+C0v2sF/tIE56FX4SLEbipPx44fd7okhqarcg51uzJAK0wWazkAzv9Nx9wKBgEUk\n9EtvyWO22tkvqKEEytoDZOrycSmTNC7THhKtTnMrnmSDjXSbx9D+lVZflSbrkhCy\nqpu5A3KfaRG70SXTUHYWGbu1e0XF9bLzdtegCRSj3L6rxhUVKuC1mP3NUreT38p9\nV7rAhNA/EV2W6RwzqK80RFqfNKO72lrGr4MamBUjAoGBAJzy/47STnaW23aPutJF\nU23Kp5QDSkZzCniDBNIbuxlgZ5x2m4wK0FPRwWBcuvisG3G9VXohEfxJ0/IG8t6/\nOH1tVXYeR9pWtGIWEZuzFHL38ji4/BL3i94gW26GntJrr1ut94KHN1ynqkYRP/gK\ngRU91/0vXG+SOTubYUh5G5w3\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "dummy-bifrost@dummy-bifrost-project.iam.gserviceaccount.com",
|
||||
"client_id": "123456789012345678901",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dummy-bifrost%40dummy-bifrost-project.iam.gserviceaccount.com"
|
||||
}
|
||||
126
tests/integrations/python/pyproject.toml
Normal file
126
tests/integrations/python/pyproject.toml
Normal file
@@ -0,0 +1,126 @@
|
||||
[project]
|
||||
name = "bifrost-integration-tests"
|
||||
version = "0.1.0"
|
||||
description = "Production-ready end-to-end test suite for testing AI integrations through Bifrost proxy"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
dependencies = [
|
||||
# Core testing framework
|
||||
"pytest>=7.0.0",
|
||||
"pytest-asyncio>=0.21.0",
|
||||
# Environment and configuration
|
||||
"python-dotenv>=1.0.0",
|
||||
"PyYAML>=6.0",
|
||||
# Image processing
|
||||
"Pillow>=9.0.0",
|
||||
# HTTP requests for debugging
|
||||
"requests>=2.28.0",
|
||||
# Type hints
|
||||
"typing-extensions>=4.0.0",
|
||||
# Test reporting
|
||||
"pytest-html>=3.1.0",
|
||||
"pytest-cov>=4.0.0",
|
||||
# AI/ML SDK dependencies
|
||||
"openai>=1.30.0",
|
||||
"anthropic>=0.25.0",
|
||||
"litellm==1.80.5",
|
||||
"langchain-openai==0.1.0",
|
||||
"langchain-core==0.3.81",
|
||||
"langchain-anthropic==0.1.0",
|
||||
"langchain-google-genai==4.1.1",
|
||||
"langchain-mistralai==0.1.0",
|
||||
"langgraph>=0.1.0",
|
||||
"mistralai>=0.4.0",
|
||||
"google-genai>=1.50.0",
|
||||
"pydantic-ai>=0.1.0",
|
||||
"boto3>=1.34.0",
|
||||
# Testing utilities
|
||||
"websocket-client>=1.6.0",
|
||||
"httpx>=0.25.0",
|
||||
"pytest-timeout>=2.1.0",
|
||||
"pytest-mock>=3.11.0",
|
||||
"pytest-rerunfailures>=11.0",
|
||||
"langchain-google-vertexai>=3.1.0",
|
||||
"langchain-tests>=1.0.2",
|
||||
"langchain>=1.1.0",
|
||||
"langchain-community>=0.4.1",
|
||||
"langchain-aws>=1.1.0",
|
||||
"pytest-xdist>=3.8.0",
|
||||
"pyasn1>=0.6.2",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"black>=23.0.0", # Code formatting
|
||||
"flake8>=6.0.0", # Linting
|
||||
"mypy>=1.5.0", # Type checking
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# Test discovery
|
||||
testpaths = ["."]
|
||||
python_files = "test_*.py"
|
||||
python_classes = "Test*"
|
||||
python_functions = "test_*"
|
||||
|
||||
# Output formatting
|
||||
addopts = [
|
||||
"-v",
|
||||
"-s", # Show print statements (no output capture)
|
||||
"--tb=short",
|
||||
"--strict-markers",
|
||||
"--disable-warnings",
|
||||
"--color=yes",
|
||||
]
|
||||
|
||||
# Logging configuration
|
||||
log_cli = true
|
||||
log_cli_level = "ERROR"
|
||||
log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s"
|
||||
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
# Timeout settings (3 minutes per test)
|
||||
timeout = 300
|
||||
|
||||
# Markers for test categorization
|
||||
markers = [
|
||||
"integration: marks tests as integration tests",
|
||||
"slow: marks tests as slow running",
|
||||
"e2e: marks tests as end-to-end tests",
|
||||
"tool_calling: marks tests as tool calling tests",
|
||||
"flaky: marks tests as flaky with automatic retries (reruns=3, reruns_delay=2)",
|
||||
]
|
||||
|
||||
# Minimum version
|
||||
minversion = "7.0"
|
||||
|
||||
[tool.black]
|
||||
line-length = 100
|
||||
target-version = ['py38', 'py39', 'py310', 'py311']
|
||||
include = '\.pyi?$'
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
disallow_untyped_defs = false
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.coverage.run]
|
||||
source = ["tests"]
|
||||
omit = ["*/tests/*", "*/venv/*", "*/.venv/*"]
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_lines = [
|
||||
"pragma: no cover",
|
||||
"def __repr__",
|
||||
"raise AssertionError",
|
||||
"raise NotImplementedError",
|
||||
"if __name__ == .__main__.:",
|
||||
"if TYPE_CHECKING:",
|
||||
]
|
||||
|
||||
|
||||
[tool.uv]
|
||||
exclude-newer = "2026-04-08"
|
||||
343
tests/integrations/python/run_all_tests.py
Executable file
343
tests/integrations/python/run_all_tests.py
Executable file
@@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bifrost Integration End-to-End Test Runner
|
||||
|
||||
This script runs all integration end-to-end tests for Bifrost.
|
||||
It can run tests individually or all together, providing comprehensive
|
||||
reporting and flexible execution options.
|
||||
|
||||
Usage:
|
||||
python run_all_tests.py # Run all tests
|
||||
python run_all_tests.py --integration openai # Run specific integration
|
||||
python run_all_tests.py --list # List available integrations
|
||||
python run_all_tests.py --parallel # Run tests in parallel
|
||||
python run_all_tests.py --verbose # Verbose output
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
import concurrent.futures
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class BifrostTestRunner:
|
||||
"""Main test runner for Bifrost integration tests"""
|
||||
|
||||
def __init__(self):
|
||||
self.test_dir = Path(__file__).parent
|
||||
self.integrations = {
|
||||
"openai": {
|
||||
"file": "tests/integrations/test_openai.py",
|
||||
"description": "OpenAI Python SDK integration tests",
|
||||
"env_vars": ["OPENAI_API_KEY"],
|
||||
},
|
||||
"anthropic": {
|
||||
"file": "tests/integrations/test_anthropic.py",
|
||||
"description": "Anthropic Python SDK integration tests",
|
||||
"env_vars": ["ANTHROPIC_API_KEY"],
|
||||
},
|
||||
"litellm": {
|
||||
"file": "tests/integrations/test_litellm.py",
|
||||
"description": "LiteLLM integration tests",
|
||||
"env_vars": ["OPENAI_API_KEY"], # LiteLLM can use OpenAI key
|
||||
},
|
||||
"langchain": {
|
||||
"file": "tests/integrations/test_langchain.py",
|
||||
"description": "LangChain integration tests",
|
||||
"env_vars": [
|
||||
"OPENAI_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
], # LangChain uses multiple providers
|
||||
},
|
||||
"google": {
|
||||
"file": "tests/integrations/test_google.py",
|
||||
"description": "Google GenAI integration tests",
|
||||
"env_vars": ["GOOGLE_API_KEY"],
|
||||
},
|
||||
"bedrock": {
|
||||
"file": "tests/integrations/test_bedrock.py",
|
||||
"description": "Bedrock integration tests",
|
||||
"env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
|
||||
},
|
||||
}
|
||||
|
||||
self.results = {}
|
||||
|
||||
def check_environment(self, integration: str) -> bool:
|
||||
"""Check if required environment variables are set for an integration"""
|
||||
config = self.integrations[integration]
|
||||
missing_vars = []
|
||||
|
||||
for var in config["env_vars"]:
|
||||
if not os.getenv(var):
|
||||
missing_vars.append(var)
|
||||
|
||||
if missing_vars:
|
||||
print(
|
||||
f"⚠ Skipping {integration}: Missing environment variables: {', '.join(missing_vars)}"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def run_integration_test(self, integration: str, verbose: bool = False) -> Dict:
|
||||
"""Run tests for a specific integration"""
|
||||
if integration not in self.integrations:
|
||||
return {"success": False, "error": f"Unknown integration: {integration}"}
|
||||
|
||||
config = self.integrations[integration]
|
||||
test_file = self.test_dir / config["file"]
|
||||
|
||||
if not test_file.exists():
|
||||
return {"success": False, "error": f"Test file not found: {test_file}"}
|
||||
|
||||
# Check environment variables
|
||||
if not self.check_environment(integration):
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Missing required environment variables",
|
||||
"skipped": True,
|
||||
}
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Running {integration.upper()} Integration Tests")
|
||||
print(f"{'='*60}")
|
||||
print(f"Description: {config['description']}")
|
||||
print(f"Test file: {config['file']}")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Run the test with pytest
|
||||
cmd = [sys.executable, "-m", "pytest", str(test_file)]
|
||||
|
||||
# Add pytest flags for better output
|
||||
if verbose:
|
||||
cmd.extend(["-v", "-s"]) # verbose and don't capture output
|
||||
else:
|
||||
cmd.append("-q") # quiet mode
|
||||
|
||||
if verbose:
|
||||
result = subprocess.run(
|
||||
cmd, cwd=self.test_dir, text=True, capture_output=False, timeout=300
|
||||
)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
cmd, cwd=self.test_dir, text=True, capture_output=True, timeout=300
|
||||
)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
success = result.returncode == 0
|
||||
|
||||
return {
|
||||
"success": success,
|
||||
"return_code": result.returncode,
|
||||
"stdout": result.stdout if not verbose else "",
|
||||
"stderr": result.stderr if not verbose else "",
|
||||
"elapsed_time": elapsed_time,
|
||||
}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Test timed out (5 minutes)",
|
||||
"elapsed_time": 300,
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"elapsed_time": time.time() - start_time,
|
||||
}
|
||||
|
||||
def run_all_tests(self, parallel: bool = False, verbose: bool = False) -> None:
|
||||
"""Run all integration tests"""
|
||||
print("Bifrost Integration End-to-End Test Suite")
|
||||
print("=" * 50)
|
||||
print(f"Running tests for {len(self.integrations)} integrations")
|
||||
print(f"Parallel execution: {'Enabled' if parallel else 'Disabled'}")
|
||||
print(f"Verbose output: {'Enabled' if verbose else 'Disabled'}")
|
||||
|
||||
# Check Bifrost availability
|
||||
bifrost_url = os.getenv("BIFROST_BASE_URL", "http://localhost:8080")
|
||||
print(f"Bifrost URL: {bifrost_url}")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
if parallel:
|
||||
self._run_parallel(verbose)
|
||||
else:
|
||||
self._run_sequential(verbose)
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self._print_summary(total_time)
|
||||
|
||||
def _run_sequential(self, verbose: bool) -> None:
|
||||
"""Run tests sequentially"""
|
||||
for integration in self.integrations:
|
||||
self.results[integration] = self.run_integration_test(integration, verbose)
|
||||
|
||||
def _run_parallel(self, verbose: bool) -> None:
|
||||
"""Run tests in parallel"""
|
||||
print("\nRunning tests in parallel...")
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
||||
# Submit all tests
|
||||
future_to_integration = {
|
||||
executor.submit(
|
||||
self.run_integration_test, integration, verbose
|
||||
): integration
|
||||
for integration in self.integrations
|
||||
}
|
||||
|
||||
# Collect results
|
||||
for future in concurrent.futures.as_completed(future_to_integration):
|
||||
integration = future_to_integration[future]
|
||||
try:
|
||||
self.results[integration] = future.result()
|
||||
except Exception as e:
|
||||
self.results[integration] = {"success": False, "error": str(e)}
|
||||
|
||||
def _print_summary(self, total_time: float) -> None:
|
||||
"""Print test summary"""
|
||||
print(f"\n{'='*60}")
|
||||
print("TEST SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
skipped = 0
|
||||
|
||||
for integration, result in self.results.items():
|
||||
status = (
|
||||
"SKIPPED"
|
||||
if result.get("skipped")
|
||||
else ("PASSED" if result["success"] else "FAILED")
|
||||
)
|
||||
elapsed = result.get("elapsed_time", 0)
|
||||
|
||||
if result.get("skipped"):
|
||||
skipped += 1
|
||||
print(
|
||||
f"⚠ {integration:12} {status:8} - {result.get('error', 'Unknown error')}"
|
||||
)
|
||||
elif result["success"]:
|
||||
passed += 1
|
||||
print(f"✓ {integration:12} {status:8} - {elapsed:.2f}s")
|
||||
else:
|
||||
failed += 1
|
||||
error_msg = result.get("error", "Unknown error")
|
||||
print(f"✗ {integration:12} {status:8} - {error_msg}")
|
||||
|
||||
# Print stderr if available
|
||||
if "stderr" in result and result["stderr"]:
|
||||
print(f" Error output: {result['stderr'][:200]}...")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(
|
||||
f"Total: {len(self.integrations)} | Passed: {passed} | Failed: {failed} | Skipped: {skipped}"
|
||||
)
|
||||
print(f"Total time: {total_time:.2f} seconds")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Exit with appropriate code
|
||||
if failed > 0:
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("All tests completed successfully!")
|
||||
|
||||
def list_integrations(self) -> None:
|
||||
"""List available integrations"""
|
||||
print("Available Integrations:")
|
||||
print("=" * 30)
|
||||
|
||||
for integration, config in self.integrations.items():
|
||||
env_status = "✓" if self.check_environment(integration) else "✗"
|
||||
print(f"{env_status} {integration:12} - {config['description']}")
|
||||
print(f" Required env vars: {', '.join(config['env_vars'])}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run Bifrost integration end-to-end tests",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python run_all_tests.py # Run all tests
|
||||
python run_all_tests.py --integration openai # Run OpenAI tests only
|
||||
python run_all_tests.py --parallel --verbose # Run all tests in parallel with verbose output
|
||||
python run_all_tests.py --list # List available integrations
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--integration", "-i", help="Run tests for specific integration only"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--list",
|
||||
"-l",
|
||||
action="store_true",
|
||||
help="List available integrations and their status",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--parallel",
|
||||
"-p",
|
||||
action="store_true",
|
||||
help="Run tests in parallel (faster but less readable output)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="Enable verbose output (shows test output in real-time)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
runner = BifrostTestRunner()
|
||||
|
||||
if args.list:
|
||||
runner.list_integrations()
|
||||
return
|
||||
|
||||
if args.integration:
|
||||
if args.integration not in runner.integrations:
|
||||
print(f"Error: Unknown integration '{args.integration}'")
|
||||
print(f"Available integrations: {', '.join(runner.integrations.keys())}")
|
||||
sys.exit(1)
|
||||
|
||||
result = runner.run_integration_test(args.integration, args.verbose)
|
||||
if result["success"]:
|
||||
print(f"\n✓ {args.integration} tests passed!")
|
||||
else:
|
||||
error_msg = result.get("error", "Unknown error")
|
||||
print(f"\n✗ {args.integration} tests failed: {error_msg}")
|
||||
|
||||
# Show stdout/stderr if available
|
||||
if result.get("stdout"):
|
||||
print("\n--- Test Output ---")
|
||||
print(result["stdout"])
|
||||
if result.get("stderr"):
|
||||
print("\n--- Error Output ---")
|
||||
print(result["stderr"])
|
||||
|
||||
sys.exit(1)
|
||||
else:
|
||||
runner.run_all_tests(args.parallel, args.verbose)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
272
tests/integrations/python/run_integration_tests.py
Executable file
272
tests/integrations/python/run_integration_tests.py
Executable file
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration-specific test runner for Bifrost integration tests.
|
||||
|
||||
This script runs tests for each integration independently using their native SDKs.
|
||||
No more complex gateway conversions - just direct testing!
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
def check_api_keys():
|
||||
"""Check which API keys are available"""
|
||||
keys = {
|
||||
"openai": os.getenv("OPENAI_API_KEY"),
|
||||
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"google": os.getenv("GOOGLE_API_KEY"),
|
||||
"litellm": os.getenv("LITELLM_API_KEY"),
|
||||
"bedrock": os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
}
|
||||
|
||||
available = [integration for integration, key in keys.items() if key]
|
||||
missing = [integration for integration, key in keys.items() if not key]
|
||||
|
||||
return available, missing
|
||||
|
||||
|
||||
def run_integration_tests(
|
||||
integrations: List[str], test_pattern: Optional[str] = None, verbose: bool = False
|
||||
):
|
||||
"""Run tests for specified integrations"""
|
||||
|
||||
results = {}
|
||||
|
||||
for integration in integrations:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"🧪 TESTING {integration.upper()} INTEGRATION")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Build pytest command with absolute path relative to script location
|
||||
script_dir = Path(__file__).parent
|
||||
test_file = script_dir / "tests" / "integrations" / f"test_{integration}.py"
|
||||
|
||||
# Check if test file exists
|
||||
if not test_file.exists():
|
||||
print(f"❌ Test file not found: {test_file}")
|
||||
results[integration] = {"error": f"Test file not found: {test_file}"}
|
||||
continue
|
||||
|
||||
cmd = ["python", "-m", "pytest", str(test_file)]
|
||||
|
||||
if test_pattern:
|
||||
cmd.extend(["-k", test_pattern])
|
||||
|
||||
if verbose:
|
||||
cmd.append("-v")
|
||||
else:
|
||||
cmd.append("-q")
|
||||
|
||||
# Remove integration-specific marker (not needed for file-based selection)
|
||||
# cmd.extend(["-m", integration])
|
||||
|
||||
# Run the tests
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
results[integration] = {
|
||||
"returncode": result.returncode,
|
||||
"stdout": result.stdout,
|
||||
"stderr": "", # stderr is now captured in stdout
|
||||
}
|
||||
|
||||
# Print results
|
||||
print(f"✅ {integration.upper()} tests PASSED")
|
||||
|
||||
if verbose:
|
||||
print(result.stdout)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ {integration.upper()} tests FAILED")
|
||||
results[integration] = {
|
||||
"returncode": e.returncode,
|
||||
"stdout": e.stdout,
|
||||
"stderr": "", # stderr is captured in stdout
|
||||
}
|
||||
|
||||
# Always print output on failure to show what went wrong
|
||||
if e.stdout:
|
||||
print(e.stdout)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error running {integration} tests: {e}")
|
||||
results[integration] = {"error": str(e)}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def print_summary(
|
||||
results: dict, available_integrations: List[str], missing_integrations: List[str]
|
||||
):
|
||||
"""Print final summary"""
|
||||
print(f"\n{'='*80}")
|
||||
print("🎯 FINAL SUMMARY")
|
||||
print(f"{'='*80}")
|
||||
|
||||
# API Key Status
|
||||
print(f"\n🔑 API Key Status:")
|
||||
for integration in available_integrations:
|
||||
print(f" ✅ {integration.upper()}: Available")
|
||||
|
||||
for integration in missing_integrations:
|
||||
print(f" ❌ {integration.upper()}: Missing API key")
|
||||
|
||||
# Test Results
|
||||
print(f"\n📊 Test Results:")
|
||||
passed_integrations = []
|
||||
failed_integrations = []
|
||||
|
||||
for integration, result in results.items():
|
||||
if "error" in result:
|
||||
print(f" 💥 {integration.upper()}: Error - {result['error']}")
|
||||
failed_integrations.append(integration)
|
||||
elif result["returncode"] == 0:
|
||||
print(f" ✅ {integration.upper()}: All tests passed")
|
||||
passed_integrations.append(integration)
|
||||
else:
|
||||
print(f" ❌ {integration.upper()}: Some tests failed")
|
||||
failed_integrations.append(integration)
|
||||
|
||||
# Overall Status
|
||||
total_tested = len(results)
|
||||
total_passed = len(passed_integrations)
|
||||
|
||||
print(f"\n🏆 Overall Results:")
|
||||
print(f" Integrations tested: {total_tested}")
|
||||
print(f" Integrations passed: {total_passed}")
|
||||
print(
|
||||
f" Success rate: {(total_passed/total_tested)*100:.1f}%"
|
||||
if total_tested > 0
|
||||
else " Success rate: N/A"
|
||||
)
|
||||
|
||||
if failed_integrations:
|
||||
print(f"\n⚠️ Failed integrations: {', '.join(failed_integrations)}")
|
||||
print(" Check the detailed output above for specific test failures.")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run integration-specific integration tests"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--integrations",
|
||||
nargs="+",
|
||||
choices=["openai", "anthropic", "google", "litellm", "all"],
|
||||
default=["all"],
|
||||
help="Integrations to test (default: all available)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test", help="Run specific test pattern (e.g., 'test_01_simple_chat')"
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument(
|
||||
"--check-keys", action="store_true", help="Only check API key availability"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--show-models",
|
||||
action="store_true",
|
||||
help="Show model configuration for all integrations",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check API keys
|
||||
available_integrations, missing_integrations = check_api_keys()
|
||||
|
||||
if args.check_keys:
|
||||
print("🔑 API Key Status:")
|
||||
for integration in available_integrations:
|
||||
print(f" ✅ {integration.upper()}: Available")
|
||||
for integration in missing_integrations:
|
||||
print(f" ❌ {integration.upper()}: Missing")
|
||||
return
|
||||
|
||||
if args.show_models:
|
||||
# Import and show model configuration using absolute path
|
||||
script_dir = Path(__file__).parent
|
||||
models_path = script_dir / "tests" / "utils" / "models.py"
|
||||
|
||||
if not models_path.exists():
|
||||
print(f"❌ Models file not found: {models_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Add the parent directory to sys.path to enable the import
|
||||
models_parent_dir = str(script_dir)
|
||||
if models_parent_dir not in sys.path:
|
||||
sys.path.insert(0, models_parent_dir)
|
||||
|
||||
try:
|
||||
from tests.utils.models import print_model_summary
|
||||
|
||||
print_model_summary()
|
||||
except ImportError as e:
|
||||
print(f"❌ Could not import print_model_summary: {e}")
|
||||
print(f"Tried to import from: {models_path}")
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
# Determine which integrations to test
|
||||
if "all" in args.integrations:
|
||||
integrations_to_test = available_integrations
|
||||
requested_integrations = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google",
|
||||
"litellm",
|
||||
] # all possible integrations
|
||||
else:
|
||||
integrations_to_test = [
|
||||
p for p in args.integrations if p in available_integrations
|
||||
]
|
||||
requested_integrations = args.integrations
|
||||
|
||||
if not integrations_to_test:
|
||||
print("❌ No integrations available for testing. Please set API keys.")
|
||||
print("\nRequired environment variables for requested integrations:")
|
||||
for integration in requested_integrations:
|
||||
if integration != "all": # Skip the "all" keyword
|
||||
api_key_name = f"{integration.upper()}_API_KEY"
|
||||
print(f" - {api_key_name}")
|
||||
sys.exit(1)
|
||||
|
||||
# Calculate which requested integrations are missing API keys
|
||||
requested_missing_integrations = [
|
||||
integration
|
||||
for integration in requested_integrations
|
||||
if integration in missing_integrations
|
||||
]
|
||||
|
||||
# Show what we're about to test
|
||||
print("🚀 Starting integration tests...")
|
||||
print(f"📋 Testing integrations: {', '.join(integrations_to_test)}")
|
||||
if requested_missing_integrations:
|
||||
print(
|
||||
f"⏭️ Skipping integrations (no API key): {', '.join(requested_missing_integrations)}"
|
||||
)
|
||||
|
||||
# Run tests
|
||||
results = run_integration_tests(integrations_to_test, args.test, args.verbose)
|
||||
|
||||
# Print summary
|
||||
print_summary(results, available_integrations, requested_missing_integrations)
|
||||
|
||||
# Exit with appropriate code
|
||||
failed_count = sum(
|
||||
1 for r in results.values() if r.get("returncode", 1) != 0 or "error" in r
|
||||
)
|
||||
sys.exit(failed_count)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
8
tests/integrations/python/tests/__init__.py
Normal file
8
tests/integrations/python/tests/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Bifrost Integration Tests
|
||||
|
||||
Production-ready test suite for testing various AI integrations through Bifrost proxy.
|
||||
Supports multiple integrations with uniform test interface.
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
188
tests/integrations/python/tests/conftest.py
Normal file
188
tests/integrations/python/tests/conftest.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Pytest configuration for integration-specific tests.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import os
|
||||
import logging
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest with custom markers and logging"""
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.ERROR,
|
||||
format='%(asctime)s [%(levelname)8s] %(name)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
# Add custom markers
|
||||
config.addinivalue_line("markers", "openai: mark test as requiring OpenAI API key")
|
||||
config.addinivalue_line(
|
||||
"markers", "anthropic: mark test as requiring Anthropic API key"
|
||||
)
|
||||
config.addinivalue_line("markers", "google: mark test as requiring Google API key")
|
||||
config.addinivalue_line("markers", "litellm: mark test as requiring LiteLLM setup")
|
||||
config.addinivalue_line("markers", "azure: Azure OpenAI integration tests")
|
||||
config.addinivalue_line(
|
||||
"markers", "flaky: mark test as flaky with automatic retries (reruns=3, reruns_delay=2)"
|
||||
)
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
"""Modify test collection to add markers based on test file names"""
|
||||
# Add flaky marker to all tests for retry on failure
|
||||
flaky_marker = pytest.mark.flaky(reruns=3, reruns_delay=2)
|
||||
|
||||
for item in items:
|
||||
# Add flaky marker to all tests
|
||||
item.add_marker(flaky_marker)
|
||||
|
||||
# Add markers based on test file location
|
||||
if "test_openai" in item.nodeid:
|
||||
item.add_marker(pytest.mark.openai)
|
||||
elif "test_anthropic" in item.nodeid:
|
||||
item.add_marker(pytest.mark.anthropic)
|
||||
elif "test_google" in item.nodeid:
|
||||
item.add_marker(pytest.mark.google)
|
||||
elif "test_litellm" in item.nodeid:
|
||||
item.add_marker(pytest.mark.litellm)
|
||||
elif "test_azure" in item.nodeid:
|
||||
item.add_marker(pytest.mark.azure)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def api_keys():
|
||||
"""Collect all available API keys"""
|
||||
return {
|
||||
"openai": os.getenv("OPENAI_API_KEY"),
|
||||
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"google": os.getenv("GOOGLE_API_KEY"),
|
||||
"litellm": os.getenv("LITELLM_API_KEY"),
|
||||
"azure": os.getenv("AZURE_API_KEY"),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def available_integrations(api_keys):
|
||||
"""Determine which integrations are available based on API keys"""
|
||||
available = []
|
||||
|
||||
if api_keys["openai"]:
|
||||
available.append("openai")
|
||||
if api_keys["anthropic"]:
|
||||
available.append("anthropic")
|
||||
if api_keys["google"]:
|
||||
available.append("google")
|
||||
if api_keys["litellm"]:
|
||||
available.append("litellm")
|
||||
if api_keys["azure"]:
|
||||
available.append("azure")
|
||||
|
||||
return available
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_summary():
|
||||
"""Fixture to collect test results for summary reporting"""
|
||||
results = {"passed": [], "failed": [], "skipped": []}
|
||||
return results
|
||||
|
||||
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""Hook to capture test results"""
|
||||
# Only record results during the "call" phase to avoid double counting
|
||||
if call.when == "call":
|
||||
# Extract integration and test info
|
||||
integration = None
|
||||
if "test_openai" in item.nodeid:
|
||||
integration = "openai"
|
||||
elif "test_anthropic" in item.nodeid:
|
||||
integration = "anthropic"
|
||||
elif "test_google" in item.nodeid:
|
||||
integration = "google"
|
||||
elif "test_litellm" in item.nodeid:
|
||||
integration = "litellm"
|
||||
elif "test_azure" in item.nodeid:
|
||||
integration = "azure"
|
||||
|
||||
test_name = item.name
|
||||
|
||||
# Store result info
|
||||
result_info = {
|
||||
"integration": integration,
|
||||
"test": test_name,
|
||||
"nodeid": item.nodeid,
|
||||
}
|
||||
|
||||
if hasattr(item.session, "test_results"):
|
||||
if call.excinfo is None:
|
||||
item.session.test_results["passed"].append(result_info)
|
||||
else:
|
||||
result_info["error"] = str(call.excinfo.value)
|
||||
item.session.test_results["failed"].append(result_info)
|
||||
|
||||
|
||||
def pytest_sessionstart(session):
|
||||
"""Initialize test results collection"""
|
||||
session.test_results = {"passed": [], "failed": [], "skipped": []}
|
||||
|
||||
|
||||
def pytest_sessionfinish(session, exitstatus):
|
||||
"""Print test summary at the end"""
|
||||
results = session.test_results
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("INTEGRATION TEST SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
# Group results by integration
|
||||
integration_results = {}
|
||||
|
||||
for result in results["passed"] + results["failed"] + results["skipped"]:
|
||||
integration = result.get("integration", "unknown")
|
||||
if integration and integration not in integration_results:
|
||||
integration_results[integration] = {"passed": 0, "failed": 0, "skipped": 0}
|
||||
|
||||
for result in results["passed"]:
|
||||
integration = result.get("integration", "unknown")
|
||||
if integration and integration in integration_results:
|
||||
integration_results[integration]["passed"] += 1
|
||||
|
||||
for result in results["failed"]:
|
||||
integration = result.get("integration", "unknown")
|
||||
if integration and integration in integration_results:
|
||||
integration_results[integration]["failed"] += 1
|
||||
|
||||
for result in results["skipped"]:
|
||||
integration = result.get("integration", "unknown")
|
||||
if integration and integration in integration_results:
|
||||
integration_results[integration]["skipped"] += 1
|
||||
|
||||
# Print summary by integration
|
||||
for integration, counts in integration_results.items():
|
||||
total = counts["passed"] + counts["failed"] + counts["skipped"]
|
||||
if total > 0:
|
||||
print(f"\n{integration.upper()} Integration:")
|
||||
print(f" ✅ Passed: {counts['passed']}")
|
||||
print(f" ❌ Failed: {counts['failed']}")
|
||||
print(f" ⏭️ Skipped: {counts['skipped']}")
|
||||
print(f" 📊 Total: {total}")
|
||||
|
||||
if counts["passed"] > 0:
|
||||
success_rate = (
|
||||
(counts["passed"] / (counts["passed"] + counts["failed"])) * 100
|
||||
if (counts["passed"] + counts["failed"]) > 0
|
||||
else 0
|
||||
)
|
||||
print(f" 🎯 Success Rate: {success_rate:.1f}%")
|
||||
|
||||
# Print failed tests details
|
||||
if results["failed"]:
|
||||
print(f"\n❌ FAILED TESTS ({len(results['failed'])}):")
|
||||
for result in results["failed"]:
|
||||
print(f" • {result['integration']}: {result['test']}")
|
||||
if "error" in result:
|
||||
print(f" Error: {result['error']}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
3782
tests/integrations/python/tests/test_anthropic.py
Normal file
3782
tests/integrations/python/tests/test_anthropic.py
Normal file
File diff suppressed because it is too large
Load Diff
2433
tests/integrations/python/tests/test_azure.py
Normal file
2433
tests/integrations/python/tests/test_azure.py
Normal file
File diff suppressed because it is too large
Load Diff
2895
tests/integrations/python/tests/test_bedrock.py
Normal file
2895
tests/integrations/python/tests/test_bedrock.py
Normal file
File diff suppressed because it is too large
Load Diff
3497
tests/integrations/python/tests/test_google.py
Normal file
3497
tests/integrations/python/tests/test_google.py
Normal file
File diff suppressed because it is too large
Load Diff
1496
tests/integrations/python/tests/test_langchain.py
Normal file
1496
tests/integrations/python/tests/test_langchain.py
Normal file
File diff suppressed because it is too large
Load Diff
911
tests/integrations/python/tests/test_litellm.py
Normal file
911
tests/integrations/python/tests/test_litellm.py
Normal file
@@ -0,0 +1,911 @@
|
||||
"""
|
||||
LiteLLM Integration Tests
|
||||
|
||||
🤖 MODELS USED:
|
||||
- Chat: gpt-3.5-turbo (OpenAI via LiteLLM)
|
||||
- Vision: gpt-4o (OpenAI via LiteLLM)
|
||||
- Tools: gpt-3.5-turbo (OpenAI via LiteLLM)
|
||||
- Speech: tts-1 (OpenAI via LiteLLM)
|
||||
- Transcription: whisper-1 (OpenAI via LiteLLM)
|
||||
- Embeddings: text-embedding-3-small (OpenAI via LiteLLM)
|
||||
- Alternatives: claude-3-haiku-20240307, gemini-pro, mistral-7b-instruct, gpt-4, command-r-plus
|
||||
|
||||
Tests all 19 core scenarios using LiteLLM SDK directly:
|
||||
1. Simple chat
|
||||
2. Multi turn conversation
|
||||
3. Tool calls
|
||||
4. Multiple tool calls
|
||||
5. End2End tool calling
|
||||
6. Automatic function calling
|
||||
7. Image (url)
|
||||
8. Image (base64)
|
||||
9. Multiple images
|
||||
10. Complete end2end test with conversation history, tool calls, tool results and images
|
||||
11. Integration specific tests
|
||||
12. Error handling
|
||||
13. Streaming
|
||||
14. Google Gemini integration
|
||||
15. Mistral integration
|
||||
16. OpenAI embeddings via LiteLLM
|
||||
17. OpenAI speech synthesis via LiteLLM
|
||||
18. OpenAI transcription via LiteLLM
|
||||
19. Multi-provider comparison
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
import litellm
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from .utils.common import (
|
||||
Config,
|
||||
SIMPLE_CHAT_MESSAGES,
|
||||
MULTI_TURN_MESSAGES,
|
||||
SINGLE_TOOL_CALL_MESSAGES,
|
||||
MULTIPLE_TOOL_CALL_MESSAGES,
|
||||
IMAGE_URL_MESSAGES,
|
||||
IMAGE_BASE64_MESSAGES,
|
||||
MULTIPLE_IMAGES_MESSAGES,
|
||||
COMPLEX_E2E_MESSAGES,
|
||||
INVALID_ROLE_MESSAGES,
|
||||
STREAMING_CHAT_MESSAGES,
|
||||
STREAMING_TOOL_CALL_MESSAGES,
|
||||
WEATHER_TOOL,
|
||||
CALCULATOR_TOOL,
|
||||
mock_tool_response,
|
||||
assert_valid_chat_response,
|
||||
assert_has_tool_calls,
|
||||
assert_valid_image_response,
|
||||
assert_valid_error_response,
|
||||
assert_error_propagation,
|
||||
assert_valid_streaming_response,
|
||||
collect_streaming_content,
|
||||
extract_tool_calls,
|
||||
get_api_key,
|
||||
skip_if_no_api_key,
|
||||
COMPARISON_KEYWORDS,
|
||||
WEATHER_KEYWORDS,
|
||||
LOCATION_KEYWORDS,
|
||||
# Audio and embeddings test data
|
||||
EMBEDDINGS_SINGLE_TEXT,
|
||||
EMBEDDINGS_MULTIPLE_TEXTS,
|
||||
EMBEDDINGS_SIMILAR_TEXTS,
|
||||
SPEECH_TEST_INPUT,
|
||||
generate_test_audio,
|
||||
assert_valid_speech_response,
|
||||
assert_valid_transcription_response,
|
||||
assert_valid_embedding_response,
|
||||
assert_valid_embeddings_batch_response,
|
||||
calculate_cosine_similarity,
|
||||
collect_streaming_transcription_content,
|
||||
get_provider_voice,
|
||||
get_provider_voices,
|
||||
# Token counting test data
|
||||
INPUT_TOKENS_SIMPLE_TEXT,
|
||||
INPUT_TOKENS_LONG_TEXT,
|
||||
INPUT_TOKENS_WITH_SYSTEM,
|
||||
)
|
||||
from .utils.config_loader import get_model
|
||||
from .utils.parametrize import (
|
||||
get_cross_provider_params_for_scenario,
|
||||
format_provider_model,
|
||||
)
|
||||
|
||||
# LiteLLM-specific provider exclusions
|
||||
# Bedrock and Cohere don't work well through LiteLLM proxy
|
||||
# Gemini is excluded because LiteLLM routes it through Vertex AI-specific endpoints
|
||||
# that Bifrost's LiteLLM integration doesn't support
|
||||
LITELLM_EXCLUDED_PROVIDERS = ["bedrock", "cohere", "gemini"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_config():
|
||||
"""Test configuration"""
|
||||
return Config()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_litellm(monkeypatch):
|
||||
"""Setup LiteLLM with Bifrost configuration and dummy credentials"""
|
||||
import os
|
||||
from .utils.config_loader import get_integration_url, get_config
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
# Set dummy credentials since Bifrost handles actual authentication
|
||||
os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
|
||||
os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
|
||||
os.environ["MISTRAL_API_KEY"] = "dummy-mistral-key-bifrost-handles-auth"
|
||||
|
||||
# For Google, set all possible API key environment variables
|
||||
os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
|
||||
os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
|
||||
os.environ["VERTEX_PROJECT"] = "dummy-vertex-project"
|
||||
os.environ["VERTEX_LOCATION"] = "us-central1"
|
||||
|
||||
# Set dummy Google Application Credentials to prevent Vertex AI from trying to authenticate
|
||||
# LiteLLM will load these dummy credentials but all actual requests go through Bifrost
|
||||
from pathlib import Path
|
||||
|
||||
dummy_creds_path = Path(__file__).parent.parent / "dummy-gcp-credentials.json"
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(dummy_creds_path)
|
||||
|
||||
# litellm._turn_on_debug()
|
||||
|
||||
# Mock credential refresh to prevent actual Google API calls
|
||||
# Since Bifrost handles auth, we don't need LiteLLM to authenticate
|
||||
def mock_refresh(self, request):
|
||||
"""Mock refresh that sets a dummy token - Bifrost handles real auth"""
|
||||
import datetime
|
||||
|
||||
self.token = "dummy-access-token-bifrost-handles-auth"
|
||||
self.expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
|
||||
|
||||
try:
|
||||
from google.oauth2 import service_account
|
||||
|
||||
monkeypatch.setattr(service_account.Credentials, "refresh", mock_refresh)
|
||||
except ImportError:
|
||||
pass # google-auth not installed
|
||||
|
||||
# Get Bifrost URL for LiteLLM
|
||||
base_url = get_integration_url("litellm")
|
||||
config = get_config()
|
||||
integration_settings = config.get_integration_settings("litellm")
|
||||
api_config = config.get_api_config()
|
||||
|
||||
# Configure LiteLLM globally
|
||||
if base_url:
|
||||
litellm.api_base = base_url
|
||||
|
||||
# Set timeout and other settings
|
||||
litellm.request_timeout = api_config.get("timeout", 30)
|
||||
|
||||
# Apply integration-specific settings
|
||||
if integration_settings.get("drop_params"):
|
||||
litellm.drop_params = integration_settings["drop_params"]
|
||||
if integration_settings.get("debug"):
|
||||
litellm.set_verbose = integration_settings["debug"]
|
||||
|
||||
|
||||
def convert_to_litellm_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert common tool format to LiteLLM format (OpenAI-compatible)"""
|
||||
return [{"type": "function", "function": tool} for tool in tools]
|
||||
|
||||
|
||||
class TestLiteLLMIntegration:
|
||||
"""Test suite for LiteLLM integration covering all 11 core scenarios"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"simple_chat", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_01_simple_chat(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 1: Simple chat interaction"""
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=SIMPLE_CHAT_MESSAGES,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response)
|
||||
assert response.choices[0].message.content is not None
|
||||
assert len(response.choices[0].message.content) > 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"multi_turn_conversation", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_02_multi_turn_conversation(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 2: Multi-turn conversation"""
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=MULTI_TURN_MESSAGES,
|
||||
max_tokens=150,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response)
|
||||
content = response.choices[0].message.content.lower()
|
||||
# Should mention population or numbers since we asked about Paris population
|
||||
assert any(word in content for word in ["population", "million", "people", "inhabitants"])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_03_single_tool_call(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 3: Single tool call"""
|
||||
tools = convert_to_litellm_tools([WEATHER_TOOL])
|
||||
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=SINGLE_TOOL_CALL_MESSAGES,
|
||||
tools=tools,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_has_tool_calls(response, expected_count=1)
|
||||
tool_calls = extract_tool_calls(response)
|
||||
assert tool_calls[0]["name"] == "get_weather"
|
||||
assert "location" in tool_calls[0]["arguments"]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"multiple_tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_04_multiple_tool_calls(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 4: Multiple tool calls in one response"""
|
||||
tools = convert_to_litellm_tools([WEATHER_TOOL, CALCULATOR_TOOL])
|
||||
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=MULTIPLE_TOOL_CALL_MESSAGES,
|
||||
tools=tools,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
assert_has_tool_calls(response, expected_count=2)
|
||||
tool_calls = extract_tool_calls(response)
|
||||
tool_names = [tc["name"] for tc in tool_calls]
|
||||
assert "get_weather" in tool_names
|
||||
assert "calculate" in tool_names
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"end2end_tool_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_05_end2end_tool_calling(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 5: Complete tool calling flow with responses"""
|
||||
messages = [{"role": "user", "content": "What's the weather in Boston?"}]
|
||||
tools = convert_to_litellm_tools([WEATHER_TOOL])
|
||||
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_has_tool_calls(response, expected_count=1)
|
||||
|
||||
# Add assistant's tool call to conversation
|
||||
messages.append(response.choices[0].message)
|
||||
|
||||
# Add tool response
|
||||
tool_calls = extract_litellm_tool_calls(response)
|
||||
tool_response = mock_tool_response(tool_calls[0]["name"], tool_calls[0]["arguments"])
|
||||
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": response.choices[0].message.tool_calls[0].id,
|
||||
"content": tool_response,
|
||||
}
|
||||
)
|
||||
|
||||
# Get final response
|
||||
final_response = litellm.completion(
|
||||
model=get_model("litellm", "chat"), messages=messages, max_tokens=150
|
||||
)
|
||||
|
||||
assert_valid_chat_response(final_response)
|
||||
content = final_response.choices[0].message.content.lower()
|
||||
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
|
||||
assert any(word in content for word in weather_location_keywords)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"automatic_function_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_06_automatic_function_calling(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 6: Automatic function calling"""
|
||||
tools = convert_to_litellm_tools([CALCULATOR_TOOL])
|
||||
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Calculate 25 * 4 for me"}],
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
# Should automatically choose to use the calculator
|
||||
assert_has_tool_calls(response, expected_count=1)
|
||||
tool_calls = extract_litellm_tool_calls(response)
|
||||
assert tool_calls[0]["name"] == "calculate"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"image_url", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_07_image_url(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 7: Image analysis from URL"""
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=IMAGE_URL_MESSAGES,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
assert_valid_image_response(response)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"image_base64", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_08_image_base64(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 8: Image analysis from base64"""
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=IMAGE_BASE64_MESSAGES,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
assert_valid_image_response(response)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"multiple_images", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_09_multiple_images(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 9: Multiple image analysis"""
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=MULTIPLE_IMAGES_MESSAGES,
|
||||
max_tokens=300,
|
||||
)
|
||||
|
||||
assert_valid_image_response(response)
|
||||
content = response.choices[0].message.content.lower()
|
||||
# Should mention comparison or differences
|
||||
assert any(
|
||||
word in content for word in COMPARISON_KEYWORDS
|
||||
), f"Response should contain comparison keywords. Got content: {content}"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"complex_e2end", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
@pytest.mark.skipif(True, reason="Known flaky test")
|
||||
def test_10_complex_end2end(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 10: Complex end-to-end with conversation, images, and tools"""
|
||||
messages = COMPLEX_E2E_MESSAGES.copy()
|
||||
tools = convert_to_litellm_tools([WEATHER_TOOL])
|
||||
|
||||
# First, analyze the image
|
||||
response1 = litellm.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=300,
|
||||
)
|
||||
|
||||
# Should either describe image or call weather tool (or both)
|
||||
assert (
|
||||
response1.choices[0].message.content is not None
|
||||
or response1.choices[0].message.tool_calls is not None
|
||||
)
|
||||
|
||||
# Add response to conversation
|
||||
messages.append(response1.choices[0].message)
|
||||
|
||||
# If there were tool calls, handle them
|
||||
if response1.choices[0].message.tool_calls:
|
||||
for tool_call in response1.choices[0].message.tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
tool_args = json.loads(tool_call.function.arguments)
|
||||
tool_response = mock_tool_response(tool_name, tool_args)
|
||||
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call.id,
|
||||
"content": tool_response,
|
||||
}
|
||||
)
|
||||
|
||||
# Get final response after tool calls
|
||||
final_response = litellm.completion(model=model, messages=messages, max_tokens=200)
|
||||
|
||||
assert_valid_chat_response(final_response)
|
||||
|
||||
@pytest.mark.skip(reason="known flaky test")
|
||||
def test_11_integration_specific_features(self, test_config):
|
||||
"""Test Case 11: LiteLLM-specific features"""
|
||||
|
||||
# Test 1: Multiple integrations through LiteLLM
|
||||
# Note: Gemini is excluded as LiteLLM routes it through Vertex AI-specific endpoints
|
||||
integrations_to_test = [
|
||||
"gpt-3.5-turbo", # OpenAI
|
||||
"claude-3-haiku-20240307", # Anthropic
|
||||
"mistral/mistral-7b-instruct", # Mistral
|
||||
]
|
||||
|
||||
for model in integrations_to_test:
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
max_tokens=50,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response)
|
||||
|
||||
except Exception as e:
|
||||
# Some integrations might not be available, skip gracefully
|
||||
pytest.skip(f"Integration {model} not available: {e}")
|
||||
|
||||
# Test 2: Function calling with specific tool choice
|
||||
tools = convert_to_litellm_tools([CALCULATOR_TOOL, WEATHER_TOOL])
|
||||
|
||||
response2 = litellm.completion(
|
||||
model=get_model("litellm", "chat"),
|
||||
messages=[{"role": "user", "content": "What's 15 + 27?"}],
|
||||
tools=tools,
|
||||
tool_choice={"type": "function", "function": {"name": "calculate"}},
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_has_tool_calls(response2, expected_count=1)
|
||||
tool_calls = extract_litellm_tool_calls(response2)
|
||||
assert tool_calls[0]["name"] == "calculate"
|
||||
|
||||
# Test 3: Temperature and other parameters
|
||||
response3 = litellm.completion(
|
||||
model=get_model("litellm", "chat"),
|
||||
messages=[{"role": "user", "content": "Tell me a creative story in one sentence."}],
|
||||
temperature=0.9,
|
||||
top_p=0.9,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response3)
|
||||
|
||||
def test_12_error_handling_invalid_roles(self, test_config):
|
||||
"""Test Case 12: Error handling for invalid roles"""
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
litellm.completion(
|
||||
model=get_model("litellm", "chat"),
|
||||
messages=INVALID_ROLE_MESSAGES,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
# Verify the error is properly caught and contains role-related information
|
||||
error = exc_info.value
|
||||
assert_valid_error_response(error, "tester")
|
||||
assert_error_propagation(error, "litellm")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"streaming", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_13_streaming(self, test_config, provider, model):
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
"""Test Case 13: Streaming chat completion"""
|
||||
# Test basic streaming
|
||||
stream = litellm.completion(
|
||||
model=model,
|
||||
messages=STREAMING_CHAT_MESSAGES,
|
||||
max_tokens=200,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
content, chunk_count, tool_calls_detected = collect_streaming_content(
|
||||
stream, "openai", timeout=120 # LiteLLM uses OpenAI format
|
||||
)
|
||||
|
||||
# Validate streaming results
|
||||
assert chunk_count > 0, "Should receive at least one chunk"
|
||||
assert len(content) > 10, "Should receive substantial content"
|
||||
assert not tool_calls_detected, "Basic streaming shouldn't have tool calls"
|
||||
|
||||
# Test streaming with tool calls
|
||||
stream_with_tools = litellm.completion(
|
||||
model=model,
|
||||
messages=STREAMING_TOOL_CALL_MESSAGES,
|
||||
max_tokens=150,
|
||||
tools=convert_to_litellm_tools([WEATHER_TOOL]),
|
||||
stream=True,
|
||||
)
|
||||
|
||||
content_tools, chunk_count_tools, tool_calls_detected_tools = collect_streaming_content(
|
||||
stream_with_tools, "openai", timeout=120 # LiteLLM uses OpenAI format
|
||||
)
|
||||
|
||||
# Validate tool streaming results
|
||||
assert chunk_count_tools > 0, "Should receive at least one chunk with tools"
|
||||
assert tool_calls_detected_tools, "Should detect tool calls in streaming response"
|
||||
|
||||
@pytest.mark.skip(reason="known flaky test")
|
||||
def test_14_gemini_integration(self, test_config):
|
||||
"""Test Case 14: Google Gemini integration through LiteLLM"""
|
||||
try:
|
||||
# Test basic chat with Gemini
|
||||
response = litellm.completion(
|
||||
model="gemini-2.0-flash-001",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is machine learning? Answer in one sentence.",
|
||||
}
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response)
|
||||
content = response.choices[0].message.content.lower()
|
||||
assert any(
|
||||
word in content for word in ["machine", "learning", "data", "algorithm"]
|
||||
), f"Response should mention ML concepts. Got: {content}"
|
||||
|
||||
# Test with tool calling if supported
|
||||
tools = convert_to_litellm_tools([CALCULATOR_TOOL])
|
||||
response_tools = litellm.completion(
|
||||
model="gemini-2.0-flash-001",
|
||||
messages=[{"role": "user", "content": "Calculate 42 * 17"}],
|
||||
tools=tools,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
# Gemini should either use tools or provide calculation
|
||||
if response_tools.choices[0].message.tool_calls:
|
||||
assert_has_tool_calls(response_tools, expected_count=1)
|
||||
else:
|
||||
# Should at least provide the calculation result
|
||||
content = response_tools.choices[0].message.content
|
||||
assert "714" in content or "42" in content, "Should provide calculation result"
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Gemini integration not available: {e}")
|
||||
|
||||
@pytest.mark.skip(reason="known flaky test")
|
||||
def test_15_mistral_integration(self, test_config):
|
||||
"""Test Case 15: Mistral integration through LiteLLM"""
|
||||
try:
|
||||
# Test basic chat with Mistral
|
||||
response = litellm.completion(
|
||||
model="mistral/mistral-7b-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Explain recursion in programming briefly.",
|
||||
}
|
||||
],
|
||||
max_tokens=150,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response)
|
||||
content = response.choices[0].message.content.lower()
|
||||
assert any(
|
||||
word in content for word in ["recursion", "function", "itself", "call"]
|
||||
), f"Response should explain recursion. Got: {content}"
|
||||
|
||||
# Test with different temperature
|
||||
response_creative = litellm.completion(
|
||||
model="mistral/mistral-7b-instruct",
|
||||
messages=[{"role": "user", "content": "Write a haiku about code."}],
|
||||
temperature=0.8,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response_creative)
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Mistral integration not available: {e}")
|
||||
|
||||
@pytest.mark.skip(reason="known flaky test")
|
||||
def test_16_openai_embeddings_via_litellm(self, test_config):
|
||||
"""Test Case 16: OpenAI embeddings through LiteLLM"""
|
||||
try:
|
||||
# Test single text embedding
|
||||
response = litellm.embedding(
|
||||
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
|
||||
input=EMBEDDINGS_SINGLE_TEXT,
|
||||
)
|
||||
|
||||
assert_valid_embedding_response(response, expected_dimensions=1536)
|
||||
|
||||
# Test batch embeddings
|
||||
batch_response = litellm.embedding(
|
||||
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
|
||||
input=EMBEDDINGS_MULTIPLE_TEXTS,
|
||||
)
|
||||
|
||||
assert_valid_embeddings_batch_response(
|
||||
batch_response, len(EMBEDDINGS_MULTIPLE_TEXTS), expected_dimensions=1536
|
||||
)
|
||||
|
||||
# Test similarity analysis
|
||||
similar_response = litellm.embedding(
|
||||
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
|
||||
input=EMBEDDINGS_SIMILAR_TEXTS,
|
||||
)
|
||||
|
||||
embeddings = [
|
||||
item["embedding"] if isinstance(item, dict) else item.embedding
|
||||
for item in (
|
||||
similar_response["data"]
|
||||
if isinstance(similar_response, dict)
|
||||
else similar_response.data
|
||||
)
|
||||
]
|
||||
|
||||
# Calculate similarity between similar texts
|
||||
similarity = calculate_cosine_similarity(embeddings[0], embeddings[1])
|
||||
assert (
|
||||
similarity > 0.7
|
||||
), f"Similar texts should have high similarity, got {similarity:.4f}"
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"OpenAI embeddings through LiteLLM not available: {e}")
|
||||
|
||||
def test_17_openai_speech_via_litellm(self, test_config):
|
||||
"""Test Case 17: OpenAI speech synthesis through LiteLLM"""
|
||||
try:
|
||||
# Test basic speech synthesis
|
||||
response = litellm.speech(
|
||||
model=get_model("litellm", "speech") or "tts-1",
|
||||
voice=get_provider_voice("openai", "primary"),
|
||||
input=SPEECH_TEST_INPUT,
|
||||
)
|
||||
|
||||
# LiteLLM might return different response format
|
||||
if hasattr(response, "content"):
|
||||
audio_content = response.content
|
||||
elif isinstance(response, bytes):
|
||||
audio_content = response
|
||||
else:
|
||||
audio_content = response
|
||||
|
||||
assert_valid_speech_response(audio_content)
|
||||
|
||||
# Test with different voice
|
||||
response2 = litellm.speech(
|
||||
model=get_model("litellm", "speech") or "tts-1",
|
||||
voice=get_provider_voice("openai", "secondary"),
|
||||
input="Short test message for voice comparison.",
|
||||
response_format="mp3",
|
||||
)
|
||||
|
||||
if hasattr(response2, "content"):
|
||||
audio_content2 = response2.content
|
||||
elif isinstance(response2, bytes):
|
||||
audio_content2 = response2
|
||||
else:
|
||||
audio_content2 = response2
|
||||
|
||||
assert_valid_speech_response(audio_content2, expected_audio_size_min=500)
|
||||
|
||||
# Different voices should produce different audio
|
||||
assert (
|
||||
audio_content != audio_content2
|
||||
), "Different voices should produce different audio"
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"OpenAI speech through LiteLLM not available: {e}")
|
||||
|
||||
def test_18_openai_transcription_via_litellm(self, test_config):
|
||||
"""Test Case 18: OpenAI transcription through LiteLLM"""
|
||||
try:
|
||||
# Generate test audio for transcription
|
||||
test_audio = generate_test_audio()
|
||||
|
||||
# Test basic transcription
|
||||
response = litellm.transcription(
|
||||
model=get_model("litellm", "transcription") or "whisper-1",
|
||||
file=("test_audio.wav", test_audio, "audio/wav"),
|
||||
)
|
||||
|
||||
assert_valid_transcription_response(response)
|
||||
|
||||
# Test with additional parameters
|
||||
response2 = litellm.transcription(
|
||||
model=get_model("litellm", "transcription") or "whisper-1",
|
||||
file=("test_audio.wav", test_audio, "audio/wav"),
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
assert_valid_transcription_response(response2)
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"OpenAI transcription through LiteLLM not available: {e}")
|
||||
|
||||
def test_19_multi_provider_comparison(self, test_config):
|
||||
"""Test Case 19: Compare responses across different providers through LiteLLM"""
|
||||
test_prompt = "What is the capital of Japan? Answer in one word."
|
||||
models_to_test = [
|
||||
"gpt-3.5-turbo", # OpenAI
|
||||
"claude-3-haiku-20240307", # Anthropic
|
||||
"gemini-2.0-flash-001", # Google
|
||||
]
|
||||
|
||||
responses = {}
|
||||
|
||||
for model in models_to_test:
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": test_prompt}],
|
||||
max_tokens=50,
|
||||
)
|
||||
|
||||
assert_valid_chat_response(response)
|
||||
responses[model] = response.choices[0].message.content.lower()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Model {model} not available: {e}")
|
||||
continue
|
||||
|
||||
# Verify that we got at least one response
|
||||
assert len(responses) > 0, "Should get at least one successful response"
|
||||
|
||||
# All responses should mention Tokyo or Japan
|
||||
for model, content in responses.items():
|
||||
assert any(
|
||||
word in content for word in ["tokyo", "japan"]
|
||||
), f"Model {model} should mention Tokyo. Got: {content}"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_20_token_counter_simple_text(self, test_config, provider, model):
|
||||
"""Test Case 20: Count tokens from simple text using LiteLLM token_counter"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
# Count tokens using text parameter
|
||||
token_count = litellm.token_counter(
|
||||
model=model,
|
||||
text=INPUT_TOKENS_SIMPLE_TEXT,
|
||||
)
|
||||
|
||||
# Validate token count
|
||||
assert isinstance(token_count, int), "Token count should be an integer"
|
||||
assert token_count > 0, "Token count should be positive"
|
||||
# Simple text should have a reasonable token count (between 3-20 tokens)
|
||||
assert 3 <= token_count <= 20, (
|
||||
f"Simple text should have 3-20 tokens, got {token_count}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_21_token_counter_with_messages(self, test_config, provider, model):
|
||||
"""Test Case 21: Count tokens from messages with system message using LiteLLM token_counter"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
# Count tokens using messages parameter
|
||||
token_count = litellm.token_counter(
|
||||
model=model,
|
||||
messages=INPUT_TOKENS_WITH_SYSTEM,
|
||||
)
|
||||
|
||||
# Validate token count
|
||||
assert isinstance(token_count, int), "Token count should be an integer"
|
||||
assert token_count > 0, "Token count should be positive"
|
||||
# With system message should have more tokens than simple text
|
||||
assert token_count > 2, (
|
||||
f"With system message should have >2 tokens, got {token_count}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model",
|
||||
get_cross_provider_params_for_scenario(
|
||||
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
|
||||
),
|
||||
)
|
||||
def test_22_token_counter_long_text(self, test_config, provider, model):
|
||||
"""Test Case 22: Count tokens from long text using LiteLLM token_counter"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
# Count tokens using text parameter with long text
|
||||
token_count = litellm.token_counter(
|
||||
model=model,
|
||||
text=INPUT_TOKENS_LONG_TEXT,
|
||||
)
|
||||
|
||||
# Validate token count
|
||||
assert isinstance(token_count, int), "Token count should be an integer"
|
||||
assert token_count > 100, (
|
||||
f"Long text should have >100 tokens, got {token_count}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
|
||||
|
||||
|
||||
|
||||
# Additional helper functions specific to LiteLLM
|
||||
def extract_litellm_tool_calls(response: Any) -> List[Dict[str, Any]]:
|
||||
"""Extract tool calls from LiteLLM response format (OpenAI-compatible) with proper type checking"""
|
||||
tool_calls = []
|
||||
|
||||
# Type check for LiteLLM response (OpenAI-compatible format)
|
||||
if not hasattr(response, "choices") or not response.choices:
|
||||
return tool_calls
|
||||
|
||||
choice = response.choices[0]
|
||||
if not hasattr(choice, "message") or not hasattr(choice.message, "tool_calls"):
|
||||
return tool_calls
|
||||
|
||||
if not choice.message.tool_calls:
|
||||
return tool_calls
|
||||
|
||||
for tool_call in choice.message.tool_calls:
|
||||
if hasattr(tool_call, "function") and hasattr(tool_call.function, "name"):
|
||||
try:
|
||||
arguments = (
|
||||
json.loads(tool_call.function.arguments)
|
||||
if isinstance(tool_call.function.arguments, str)
|
||||
else tool_call.function.arguments
|
||||
)
|
||||
tool_calls.append(
|
||||
{
|
||||
"name": tool_call.function.name,
|
||||
"arguments": arguments,
|
||||
}
|
||||
)
|
||||
except (json.JSONDecodeError, AttributeError) as e:
|
||||
print(f"Warning: Failed to parse LiteLLM tool call arguments: {e}")
|
||||
continue
|
||||
|
||||
return tool_calls
|
||||
4548
tests/integrations/python/tests/test_openai.py
Normal file
4548
tests/integrations/python/tests/test_openai.py
Normal file
File diff suppressed because it is too large
Load Diff
781
tests/integrations/python/tests/test_pydanticai.py
Normal file
781
tests/integrations/python/tests/test_pydanticai.py
Normal file
@@ -0,0 +1,781 @@
|
||||
"""
|
||||
Pydantic AI Integration Tests - Cross-Provider Support
|
||||
|
||||
🌉 CROSS-PROVIDER TESTING:
|
||||
This test suite uses Pydantic AI to test against multiple AI providers through Bifrost.
|
||||
Tests automatically run against all available providers with proper capability filtering.
|
||||
|
||||
🤖 PYDANTIC AI COMPONENTS TESTED:
|
||||
- Agent: Core agent class for running LLM interactions
|
||||
- Models: OpenAI (OpenAIChatModel), Anthropic (AnthropicModel), Google (GoogleModel), Cohere (CohereModel)
|
||||
- Providers: OpenAIProvider, AnthropicProvider, GoogleProvider, CohereProvider
|
||||
- Tools: Function tools with @agent.tool decorator
|
||||
- Structured Output: Pydantic BaseModel result types
|
||||
- Streaming: Real-time response streaming
|
||||
- Async Operations: agent.run() async patterns
|
||||
|
||||
⚠️ PROVIDER LIMITATIONS:
|
||||
- Bedrock: Not supported in PydanticAI tests - tested separately in test_bedrock.py
|
||||
|
||||
Tests Pydantic AI standard interface compliance and Bifrost integration:
|
||||
1. Basic Agent chat - Cross-provider
|
||||
2. Agent with system prompt (instructions) - Cross-provider
|
||||
3. Multi-turn conversation with message history - Cross-provider
|
||||
4. Tool calling with @agent.tool decorator - Cross-provider
|
||||
5. End-to-end tool calling with multi-turn flow - Cross-provider
|
||||
6. Structured output with Pydantic models - Cross-provider
|
||||
7. Streaming responses - Cross-provider
|
||||
8. Async operations
|
||||
9. Error handling
|
||||
10. Tool with context - Cross-provider
|
||||
11. Multiple tools - Cross-provider
|
||||
12. Result validation
|
||||
13. Usage tracking
|
||||
14. Message history inspection
|
||||
15. Dynamic instructions
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic_ai import Agent, RunContext, Tool
|
||||
|
||||
# Pydantic AI model imports
|
||||
from pydantic_ai.models.openai import OpenAIChatModel
|
||||
from pydantic_ai.providers.openai import OpenAIProvider
|
||||
|
||||
# Optional provider imports
|
||||
try:
|
||||
from pydantic_ai.models.anthropic import AnthropicModel
|
||||
from pydantic_ai.providers.anthropic import AnthropicProvider
|
||||
ANTHROPIC_AVAILABLE = True
|
||||
except ImportError:
|
||||
ANTHROPIC_AVAILABLE = False
|
||||
AnthropicModel = None
|
||||
AnthropicProvider = None
|
||||
|
||||
try:
|
||||
from pydantic_ai.models.google import GoogleModel
|
||||
from pydantic_ai.providers.google import GoogleProvider
|
||||
GOOGLE_AVAILABLE = True
|
||||
except ImportError:
|
||||
GOOGLE_AVAILABLE = False
|
||||
GoogleModel = None
|
||||
GoogleProvider = None
|
||||
|
||||
try:
|
||||
from cohere import AsyncClientV2 as CohereAsyncClient
|
||||
from pydantic_ai.models.cohere import CohereModel
|
||||
from pydantic_ai.providers.cohere import CohereProvider
|
||||
COHERE_AVAILABLE = True
|
||||
except ImportError:
|
||||
COHERE_AVAILABLE = False
|
||||
CohereAsyncClient = None
|
||||
CohereModel = None
|
||||
CohereProvider = None
|
||||
|
||||
from .utils.common import (
|
||||
Config,
|
||||
SIMPLE_CHAT_MESSAGES,
|
||||
MULTI_TURN_MESSAGES,
|
||||
WEATHER_TOOL,
|
||||
CALCULATOR_TOOL,
|
||||
EMBEDDINGS_SINGLE_TEXT,
|
||||
EMBEDDINGS_MULTIPLE_TEXTS,
|
||||
mock_tool_response,
|
||||
assert_valid_chat_response,
|
||||
get_api_key,
|
||||
skip_if_no_api_key,
|
||||
WEATHER_KEYWORDS,
|
||||
LOCATION_KEYWORDS,
|
||||
)
|
||||
from .utils.config_loader import get_model, get_integration_url, get_config
|
||||
from .utils.parametrize import (
|
||||
get_cross_provider_params_for_scenario,
|
||||
format_provider_model,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_config():
|
||||
"""Test configuration"""
|
||||
return Config()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_pydanticai():
|
||||
"""Setup Pydantic AI with Bifrost configuration and dummy credentials"""
|
||||
# Set dummy credentials since Bifrost handles actual authentication
|
||||
os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
|
||||
os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
|
||||
os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
|
||||
os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
|
||||
os.environ["CO_API_KEY"] = "dummy-cohere-key-bifrost-handles-auth"
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup is handled by pytest
|
||||
|
||||
|
||||
def get_openai_model(model_name: str | None = None) -> OpenAIChatModel:
|
||||
"""Create an OpenAI model configured for Bifrost"""
|
||||
base_url = get_integration_url("pydanticai")
|
||||
if model_name is None:
|
||||
model_name = get_model("pydanticai", "chat")
|
||||
|
||||
provider = OpenAIProvider(
|
||||
base_url=f"{base_url}/v1",
|
||||
api_key="dummy-openai-key-bifrost-handles-auth"
|
||||
)
|
||||
return OpenAIChatModel(model_name, provider=provider)
|
||||
|
||||
|
||||
def get_anthropic_model(model_name: str = "claude-3-haiku-20240307") -> Optional[Any]:
|
||||
"""Create an Anthropic model configured for Bifrost"""
|
||||
if not ANTHROPIC_AVAILABLE:
|
||||
return None
|
||||
|
||||
base_url = get_integration_url("pydanticai")
|
||||
|
||||
# Note: Anthropic SDK adds /v1 internally, so we don't append it here
|
||||
# (unlike OpenAI SDK which expects /v1 in the base URL)
|
||||
provider = AnthropicProvider(
|
||||
base_url=base_url,
|
||||
api_key="dummy-anthropic-key-bifrost-handles-auth"
|
||||
)
|
||||
return AnthropicModel(model_name, provider=provider)
|
||||
|
||||
|
||||
def get_google_model(model_name: str = "gemini-2.0-flash") -> Optional[Any]:
|
||||
"""Create a Google model configured for Bifrost"""
|
||||
if not GOOGLE_AVAILABLE:
|
||||
return None
|
||||
|
||||
base_url = get_integration_url("pydanticai")
|
||||
|
||||
# Configure GoogleProvider with Bifrost endpoint
|
||||
provider = GoogleProvider(
|
||||
api_key="dummy-google-api-key-bifrost-handles-auth",
|
||||
base_url=base_url
|
||||
)
|
||||
return GoogleModel(model_name, provider=provider)
|
||||
|
||||
|
||||
def get_cohere_model(model_name: str = "command-r7b-12-2024") -> Optional[Any]:
|
||||
"""Create a Cohere model configured for Bifrost"""
|
||||
if not COHERE_AVAILABLE:
|
||||
return None
|
||||
|
||||
base_url = get_integration_url("pydanticai")
|
||||
|
||||
# Cohere SDK's AsyncClientV2 accepts base_url parameter
|
||||
# We create a custom client pointing to Bifrost and pass it to CohereProvider
|
||||
cohere_client = CohereAsyncClient(
|
||||
api_key="dummy-cohere-key-bifrost-handles-auth",
|
||||
base_url=base_url
|
||||
)
|
||||
provider = CohereProvider(
|
||||
cohere_client=cohere_client
|
||||
)
|
||||
return CohereModel(model_name, provider=provider)
|
||||
|
||||
|
||||
def get_pydanticai_model_for_provider(provider: str, model: str) -> Any:
|
||||
"""
|
||||
Factory function to create a Pydantic AI model for a given provider.
|
||||
|
||||
This is the cross-provider equivalent of format_provider_model() used in Bedrock tests,
|
||||
but returns actual Pydantic AI model objects instead of string identifiers.
|
||||
|
||||
Args:
|
||||
provider: Provider name (e.g., 'openai', 'anthropic', 'gemini', 'cohere')
|
||||
model: Model name (e.g., 'gpt-4o-mini', 'claude-sonnet-4-20250514')
|
||||
|
||||
Returns:
|
||||
Configured Pydantic AI model object for the provider
|
||||
|
||||
Raises:
|
||||
ValueError: If provider is not supported or required SDK is not available
|
||||
"""
|
||||
provider_lower = provider.lower()
|
||||
|
||||
if provider_lower == "openai":
|
||||
return get_openai_model(model)
|
||||
|
||||
elif provider_lower == "anthropic":
|
||||
if not ANTHROPIC_AVAILABLE:
|
||||
raise ValueError(f"Anthropic SDK not available for provider '{provider}'")
|
||||
return get_anthropic_model(model)
|
||||
|
||||
elif provider_lower in ["gemini", "google"]:
|
||||
if not GOOGLE_AVAILABLE:
|
||||
raise ValueError(f"Google GenAI SDK not available for provider '{provider}'")
|
||||
return get_google_model(model)
|
||||
|
||||
elif provider_lower == "cohere":
|
||||
if not COHERE_AVAILABLE:
|
||||
raise ValueError(f"Cohere SDK not available for provider '{provider}'")
|
||||
return get_cohere_model(model)
|
||||
|
||||
elif provider_lower == "bedrock":
|
||||
# Bedrock is tested separately in test_bedrock.py using the native Bedrock API
|
||||
# PydanticAI doesn't have native Bedrock support, and using OpenAI SDK causes
|
||||
# validation errors due to response format differences (e.g., empty service_tier)
|
||||
raise ValueError(
|
||||
f"Provider 'bedrock' is not supported in PydanticAI tests - "
|
||||
f"use test_bedrock.py for Bedrock testing"
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported provider: {provider}. Supported: openai, anthropic, gemini, cohere")
|
||||
|
||||
|
||||
# Structured output models for testing
|
||||
class CityInfo(BaseModel):
|
||||
"""Information about a city"""
|
||||
city: str = Field(description="Name of the city")
|
||||
country: str = Field(description="Country where the city is located")
|
||||
|
||||
|
||||
class WeatherResponse(BaseModel):
|
||||
"""Weather information response"""
|
||||
location: str = Field(description="Location for the weather")
|
||||
temperature: str = Field(description="Current temperature")
|
||||
conditions: str = Field(description="Weather conditions description")
|
||||
|
||||
|
||||
class CalculationResult(BaseModel):
|
||||
"""Result of a calculation"""
|
||||
expression: str = Field(description="The mathematical expression")
|
||||
result: float = Field(description="The calculated result")
|
||||
|
||||
|
||||
class TestPydanticAIIntegration:
|
||||
"""Comprehensive Pydantic AI integration tests through Bifrost"""
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
|
||||
def test_01_basic_agent_chat(self, test_config, provider, model):
|
||||
"""Test Case 1: Basic Agent chat functionality - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
instructions="Be concise, reply with one sentence.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("Hello! How are you today?")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
assert len(str(result.output)) > 0
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
|
||||
def test_02_agent_with_system_prompt(self, test_config, provider, model):
|
||||
"""Test Case 2: Agent with custom system prompt (instructions) - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
instructions=(
|
||||
"You are a helpful geography expert. "
|
||||
"Always mention the continent when discussing cities."
|
||||
),
|
||||
)
|
||||
|
||||
result = agent.run_sync("What is the capital of France?")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
content = str(result.output).lower()
|
||||
assert "paris" in content
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multi_turn_conversation"))
|
||||
def test_03_multi_turn_conversation(self, test_config, provider, model):
|
||||
"""Test Case 3: Multi-turn conversation with message history - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
instructions="You are a helpful assistant. Remember context from previous messages.",
|
||||
)
|
||||
|
||||
# First turn
|
||||
result1 = agent.run_sync("My name is Alice.")
|
||||
|
||||
# Second turn - should remember the name
|
||||
result2 = agent.run_sync(
|
||||
"What is my name?",
|
||||
message_history=result1.all_messages(),
|
||||
)
|
||||
|
||||
assert result2 is not None
|
||||
assert result2.output is not None
|
||||
content = str(result2.output).lower()
|
||||
assert "alice" in content
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
|
||||
def test_04_tool_calling(self, test_config, provider, model):
|
||||
"""Test Case 4: Tool calling with @agent.tool decorator - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
|
||||
# Define tools as functions
|
||||
def get_weather(location: str) -> str:
|
||||
"""Get the current weather for a location."""
|
||||
return f"The weather in {location} is 72°F and sunny."
|
||||
|
||||
def calculate(expression: str) -> str:
|
||||
"""Perform a mathematical calculation."""
|
||||
try:
|
||||
# Safe evaluation for simple expressions
|
||||
result = eval(expression.replace("x", "*").replace("×", "*"))
|
||||
return f"The result of {expression} is {result}"
|
||||
except Exception:
|
||||
return f"Could not calculate {expression}"
|
||||
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
tools=[get_weather, calculate],
|
||||
instructions="You are a helpful assistant that can check weather and do calculations.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("What's the weather like in Boston?")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
content = str(result.output).lower()
|
||||
# Should either mention weather info or Boston
|
||||
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
|
||||
assert any(
|
||||
word in content for word in weather_location_keywords
|
||||
), f"Response should mention weather or location. Got: {content}"
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("end2end_tool_calling"))
|
||||
def test_05_end2end_tool_calling(self, test_config, provider, model):
|
||||
"""Test Case 5: Complete end-to-end tool calling flow with multi-turn conversation - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
|
||||
# Define a tool that we'll manually execute
|
||||
def get_weather(location: str) -> str:
|
||||
"""Get the current weather for a location."""
|
||||
return f"The weather in {location} is 72°F and sunny."
|
||||
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
tools=[get_weather],
|
||||
instructions="You are a helpful assistant that can check weather.",
|
||||
)
|
||||
|
||||
# Step 1: Initial request - should trigger tool call
|
||||
result1 = agent.run_sync("What's the weather in Boston in fahrenheit?")
|
||||
|
||||
assert result1 is not None
|
||||
assert result1.output is not None
|
||||
|
||||
# Pydantic AI automatically executes tools, so result1.output should contain
|
||||
# the final response with weather information.
|
||||
|
||||
# Verify the response contains weather information
|
||||
content = str(result1.output).lower()
|
||||
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
|
||||
assert any(
|
||||
word in content for word in weather_location_keywords
|
||||
), f"Response should mention weather or location. Got: {content}"
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydantic_structured_output"))
|
||||
def test_06_structured_output(self, test_config, provider, model):
|
||||
"""Test Case 5: Structured output with Pydantic models - runs on providers with reliable PydanticAI structured output support"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
output_type=CityInfo,
|
||||
instructions="Extract city information from the user's question.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("Tell me about Paris, the capital of France.")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
assert isinstance(result.output, CityInfo)
|
||||
assert result.output.city.lower() == "paris"
|
||||
assert "france" in result.output.country.lower()
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydanticai_streaming"))
|
||||
def test_07_streaming_responses(self, test_config, provider, model):
|
||||
"""Test Case 7: Streaming response functionality - runs on providers with PydanticAI streaming support"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
instructions="You are a storyteller. Tell short, engaging stories.",
|
||||
)
|
||||
|
||||
# Use async streaming with proper event loop handling
|
||||
async def run_streaming():
|
||||
chunks = []
|
||||
async with agent.run_stream("Tell me a very short story about a robot.") as response:
|
||||
async for chunk in response.stream_text():
|
||||
chunks.append(chunk)
|
||||
return "".join(chunks), len(chunks)
|
||||
|
||||
# Use asyncio.new_event_loop() to avoid conflicts with existing event loops
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
full_content, chunk_count = loop.run_until_complete(run_streaming())
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
assert chunk_count > 0, "Should receive streaming chunks"
|
||||
assert len(full_content) > 0, "Should have content from streaming"
|
||||
assert any(
|
||||
word in full_content.lower() for word in ["robot", "story", "once"]
|
||||
), f"Response should be a story about robots. Got: {full_content[:200]}"
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
def test_08_async_operations(self, test_config):
|
||||
"""Test Case 8: Async operation support"""
|
||||
|
||||
async def async_test():
|
||||
try:
|
||||
model = get_openai_model()
|
||||
agent = Agent(
|
||||
model,
|
||||
instructions="Be concise.",
|
||||
)
|
||||
|
||||
result = await agent.run("Hello from async!")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
assert len(str(result.output)) > 0
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Async operations through Pydantic AI not available: {e}")
|
||||
return False
|
||||
|
||||
result = asyncio.run(async_test())
|
||||
if result is not False:
|
||||
assert result is True
|
||||
|
||||
def test_09_error_handling(self, test_config):
|
||||
"""Test Case 9: Error handling for invalid requests"""
|
||||
try:
|
||||
# Test with invalid model name
|
||||
base_url = get_integration_url("pydanticai")
|
||||
provider = OpenAIProvider(
|
||||
base_url=f"{base_url}/v1",
|
||||
api_key="dummy-key"
|
||||
)
|
||||
model = OpenAIChatModel("invalid-model-name-should-fail", provider=provider)
|
||||
agent = Agent(model)
|
||||
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
agent.run_sync("This should fail gracefully.")
|
||||
|
||||
# Should get a meaningful error
|
||||
error_message = str(exc_info.value).lower()
|
||||
assert any(
|
||||
word in error_message
|
||||
for word in ["model", "error", "invalid", "not found", "does not exist"]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Error handling test through Pydantic AI not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
|
||||
def test_10_tool_with_context(self, test_config, provider, model):
|
||||
"""Test Case 10: Tool with RunContext for dependency injection - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
|
||||
@dataclass
|
||||
class UserDeps:
|
||||
user_name: str
|
||||
user_id: int
|
||||
|
||||
def get_user_info(ctx: RunContext[UserDeps]) -> str:
|
||||
"""Get information about the current user."""
|
||||
return f"User: {ctx.deps.user_name} (ID: {ctx.deps.user_id})"
|
||||
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
deps_type=UserDeps,
|
||||
tools=[Tool(get_user_info, takes_ctx=True)],
|
||||
instructions="You can look up user information when asked.",
|
||||
)
|
||||
|
||||
deps = UserDeps(user_name="Alice", user_id=123)
|
||||
result = agent.run_sync("What is my user information?", deps=deps)
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
content = str(result.output).lower()
|
||||
# Should mention Alice or user info
|
||||
assert "alice" in content or "user" in content
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multiple_tool_calls"))
|
||||
def test_11_multiple_tools(self, test_config, provider, model):
|
||||
"""Test Case 11: Multiple tools in single agent - runs across all available providers"""
|
||||
if provider == "_no_providers_" or model == "_no_model_":
|
||||
pytest.skip("No providers configured for this scenario")
|
||||
|
||||
try:
|
||||
pydantic_model = get_pydanticai_model_for_provider(provider, model)
|
||||
|
||||
def get_weather(location: str) -> str:
|
||||
"""Get weather for a location."""
|
||||
return f"Weather in {location}: 72°F, sunny"
|
||||
|
||||
def get_time(timezone: str) -> str:
|
||||
"""Get current time in a timezone."""
|
||||
return f"Current time in {timezone}: 2:30 PM"
|
||||
|
||||
def translate(text: str, target_language: str) -> str:
|
||||
"""Translate text to another language."""
|
||||
return f"'{text}' in {target_language}: [translated]"
|
||||
|
||||
agent = Agent(
|
||||
pydantic_model,
|
||||
tools=[get_weather, get_time, translate],
|
||||
instructions="You can check weather, time, and translate text.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("What's the weather in New York?")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
|
||||
except ValueError as e:
|
||||
pytest.skip(f"Provider {provider} not available: {e}")
|
||||
|
||||
def test_12_agent_with_result_validators(self, test_config):
|
||||
"""Test Case 12: Agent with result type validation"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
|
||||
class NumberResponse(BaseModel):
|
||||
"""A response containing a number"""
|
||||
value: int = Field(ge=0, le=100, description="A number between 0 and 100")
|
||||
explanation: str = Field(description="Explanation of the number")
|
||||
|
||||
agent = Agent(
|
||||
model,
|
||||
output_type=NumberResponse,
|
||||
instructions="When asked for a number, provide a value between 0 and 100.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("Give me a random number for a dice roll (1-6).")
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
assert isinstance(result.output, NumberResponse)
|
||||
assert 0 <= result.output.value <= 100
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Result validation through Pydantic AI not available: {e}")
|
||||
|
||||
def test_13_usage_tracking(self, test_config):
|
||||
"""Test Case 13: Usage tracking and token counting"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
agent = Agent(
|
||||
model,
|
||||
instructions="Be concise.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("Say hello.")
|
||||
|
||||
assert result is not None
|
||||
|
||||
# Check usage information
|
||||
usage = result.usage()
|
||||
assert usage is not None
|
||||
# Usage should have token counts
|
||||
if hasattr(usage, 'total_tokens'):
|
||||
assert usage.total_tokens > 0
|
||||
elif hasattr(usage, 'input_tokens'):
|
||||
assert usage.input_tokens > 0
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Usage tracking through Pydantic AI not available: {e}")
|
||||
|
||||
def test_14_message_history_inspection(self, test_config):
|
||||
"""Test Case 14: Inspect message history after run"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
agent = Agent(
|
||||
model,
|
||||
instructions="Be helpful.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("What is 2 + 2?")
|
||||
|
||||
# Inspect all messages
|
||||
messages = result.all_messages()
|
||||
assert messages is not None
|
||||
assert len(messages) >= 2 # At least request and response
|
||||
|
||||
# Should have user message and assistant response
|
||||
message_kinds = [msg.kind for msg in messages]
|
||||
assert "request" in message_kinds
|
||||
assert "response" in message_kinds
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Message history inspection through Pydantic AI not available: {e}")
|
||||
|
||||
def test_15_dynamic_instructions(self, test_config):
|
||||
"""Test Case 15: Dynamic instructions based on context"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
|
||||
@dataclass
|
||||
class LanguageDeps:
|
||||
language: str
|
||||
|
||||
agent = Agent(
|
||||
model,
|
||||
deps_type=LanguageDeps,
|
||||
)
|
||||
|
||||
@agent.instructions
|
||||
def dynamic_instructions(ctx: RunContext[LanguageDeps]) -> str:
|
||||
return f"Always respond in {ctx.deps.language}. Be concise."
|
||||
|
||||
deps = LanguageDeps(language="English")
|
||||
result = agent.run_sync("Say hello.", deps=deps)
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
# Response should be in English
|
||||
content = str(result.output).lower()
|
||||
assert any(word in content for word in ["hello", "hi", "greetings"])
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Dynamic instructions through Pydantic AI not available: {e}")
|
||||
|
||||
|
||||
# Additional test class for edge cases
|
||||
class TestPydanticAIEdgeCases:
|
||||
"""Edge case tests for Pydantic AI integration"""
|
||||
|
||||
def test_empty_response_handling(self, test_config):
|
||||
"""Test handling of potentially empty responses"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
agent = Agent(
|
||||
model,
|
||||
instructions="If asked to say nothing, respond with a single space.",
|
||||
)
|
||||
|
||||
result = agent.run_sync("Say as little as possible.")
|
||||
|
||||
# Should still get a valid result object
|
||||
assert result is not None
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Empty response handling test not available: {e}")
|
||||
|
||||
def test_special_characters_in_prompt(self, test_config):
|
||||
"""Test handling of special characters in prompts"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
agent = Agent(
|
||||
model,
|
||||
instructions="Echo back special characters correctly.",
|
||||
)
|
||||
|
||||
special_prompt = "Handle these: 你好 🎉 <tag> & \"quotes\" 'apostrophe'"
|
||||
result = agent.run_sync(special_prompt)
|
||||
|
||||
assert result is not None
|
||||
assert result.output is not None
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Special characters test not available: {e}")
|
||||
|
||||
def test_long_conversation_context(self, test_config):
|
||||
"""Test handling of longer conversation context"""
|
||||
try:
|
||||
model = get_openai_model()
|
||||
agent = Agent(
|
||||
model,
|
||||
instructions="You are a helpful assistant.",
|
||||
)
|
||||
|
||||
# Build up conversation history
|
||||
history = None
|
||||
for i in range(3):
|
||||
result = agent.run_sync(
|
||||
f"Remember number {i + 1}.",
|
||||
message_history=history,
|
||||
)
|
||||
history = result.all_messages()
|
||||
|
||||
# Final query should work with accumulated history
|
||||
final_result = agent.run_sync(
|
||||
"What numbers did I ask you to remember?",
|
||||
message_history=history,
|
||||
)
|
||||
|
||||
assert final_result is not None
|
||||
assert final_result.output is not None
|
||||
|
||||
except Exception as e:
|
||||
pytest.skip(f"Long conversation context test not available: {e}")
|
||||
|
||||
1
tests/integrations/python/tests/utils/__init__.py
Normal file
1
tests/integrations/python/tests/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Utils package for shared test utilities
|
||||
3595
tests/integrations/python/tests/utils/common.py
Normal file
3595
tests/integrations/python/tests/utils/common.py
Normal file
File diff suppressed because it is too large
Load Diff
522
tests/integrations/python/tests/utils/config_loader.py
Normal file
522
tests/integrations/python/tests/utils/config_loader.py
Normal file
@@ -0,0 +1,522 @@
|
||||
"""
|
||||
Configuration loader for Bifrost integration tests.
|
||||
|
||||
This module loads configuration from config.yml and provides utilities
|
||||
for constructing integration URLs through the Bifrost gateway.
|
||||
"""
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
# Integration to provider mapping
|
||||
# Maps integration names to their underlying provider configurations
|
||||
INTEGRATION_TO_PROVIDER_MAP = {
|
||||
"openai": "openai",
|
||||
"anthropic": "anthropic",
|
||||
"google": "gemini", # Google integration uses Gemini provider
|
||||
"litellm": "openai", # LiteLLM defaults to OpenAI
|
||||
"langchain": "openai", # LangChain defaults to OpenAI
|
||||
"pydanticai": "openai", # Pydantic AI defaults to OpenAI
|
||||
"bedrock": "bedrock", # Bedrock defaults to Amazon provider
|
||||
"azure": "azure",
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class BifrostConfig:
|
||||
"""Bifrost gateway configuration"""
|
||||
|
||||
base_url: str
|
||||
endpoints: Dict[str, str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrationModels:
|
||||
"""Model configuration for a integration"""
|
||||
|
||||
chat: str
|
||||
vision: str
|
||||
tools: str
|
||||
alternatives: list
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestConfig:
|
||||
"""Complete test configuration"""
|
||||
|
||||
bifrost: BifrostConfig
|
||||
api: Dict[str, Any]
|
||||
models: Dict[str, IntegrationModels]
|
||||
model_capabilities: Dict[str, Dict[str, Any]]
|
||||
test_settings: Dict[str, Any]
|
||||
integration_settings: Dict[str, Any]
|
||||
environments: Dict[str, Any]
|
||||
logging: Dict[str, Any]
|
||||
|
||||
|
||||
class ConfigLoader:
|
||||
"""Configuration loader for Bifrost integration tests"""
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None):
|
||||
"""Initialize configuration loader
|
||||
|
||||
Args:
|
||||
config_path: Path to config.yml file. If None, looks for config.yml in project root.
|
||||
"""
|
||||
if config_path is None:
|
||||
# Look for config.yml in project root
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
config_path = project_root / "config.yml"
|
||||
|
||||
self.config_path = Path(config_path)
|
||||
self._config = None
|
||||
self._load_config()
|
||||
|
||||
def _load_config(self):
|
||||
"""Load configuration from YAML file"""
|
||||
if not self.config_path.exists():
|
||||
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
|
||||
|
||||
with open(self.config_path, "r") as f:
|
||||
raw_config = yaml.safe_load(f)
|
||||
|
||||
# Expand environment variables
|
||||
self._config = self._expand_env_vars(raw_config)
|
||||
|
||||
def _expand_env_vars(self, obj):
|
||||
"""Recursively expand environment variables in configuration"""
|
||||
if isinstance(obj, dict):
|
||||
return {k: self._expand_env_vars(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [self._expand_env_vars(item) for item in obj]
|
||||
elif isinstance(obj, str):
|
||||
# Handle ${VAR:-default} syntax
|
||||
import re
|
||||
|
||||
pattern = r"\$\{([^}]+)\}"
|
||||
|
||||
def replace_var(match):
|
||||
var_expr = match.group(1)
|
||||
if ":-" in var_expr:
|
||||
var_name, default_value = var_expr.split(":-", 1)
|
||||
return os.getenv(var_name, default_value)
|
||||
else:
|
||||
return os.getenv(var_expr, "")
|
||||
|
||||
return re.sub(pattern, replace_var, obj)
|
||||
else:
|
||||
return obj
|
||||
|
||||
def get_integration_url(self, integration: str) -> str:
|
||||
"""Get the complete URL for a integration
|
||||
|
||||
Args:
|
||||
integration: Integration name (openai, anthropic, google, litellm)
|
||||
|
||||
Returns:
|
||||
Complete URL for the integration
|
||||
|
||||
Examples:
|
||||
get_integration_url("openai") -> "http://localhost:8080/openai"
|
||||
"""
|
||||
bifrost_config = self._config["bifrost"]
|
||||
base_url = bifrost_config["base_url"]
|
||||
endpoint = bifrost_config["endpoints"].get(integration, "")
|
||||
|
||||
if not endpoint:
|
||||
raise ValueError(f"No endpoint configured for integration: {integration}")
|
||||
|
||||
return f"{base_url.rstrip('/')}/{endpoint}"
|
||||
|
||||
def get_bifrost_config(self) -> BifrostConfig:
|
||||
"""Get Bifrost configuration"""
|
||||
bifrost_data = self._config["bifrost"]
|
||||
return BifrostConfig(
|
||||
base_url=bifrost_data["base_url"], endpoints=bifrost_data["endpoints"]
|
||||
)
|
||||
|
||||
def get_model(self, integration: str, model_type: str = "chat") -> str:
|
||||
"""Get model name for an integration and type
|
||||
|
||||
Maps integration names to provider configurations.
|
||||
|
||||
Args:
|
||||
integration: Integration name (openai, anthropic, google, litellm, langchain)
|
||||
model_type: Model type (chat, vision, tools, etc.)
|
||||
|
||||
Returns:
|
||||
Model name for the integration and type
|
||||
"""
|
||||
# Map integration to provider
|
||||
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
|
||||
if not provider:
|
||||
raise ValueError(
|
||||
f"Unknown integration: {integration}. "
|
||||
f"Valid integrations: {list(INTEGRATION_TO_PROVIDER_MAP.keys())}"
|
||||
)
|
||||
|
||||
# Get model from provider configuration
|
||||
return self.get_provider_model(provider, model_type)
|
||||
|
||||
def get_model_alternatives(self, integration: str) -> list:
|
||||
"""Get alternative models for an integration"""
|
||||
# Map integration to provider
|
||||
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
|
||||
if not provider:
|
||||
return []
|
||||
|
||||
# Get alternatives from provider configuration
|
||||
if "providers" not in self._config:
|
||||
return []
|
||||
|
||||
if provider not in self._config["providers"]:
|
||||
return []
|
||||
|
||||
return self._config["providers"][provider].get("alternatives", [])
|
||||
|
||||
def get_model_capabilities(self, model: str) -> Dict[str, Any]:
|
||||
"""Get capabilities for a specific model"""
|
||||
return self._config["model_capabilities"].get(
|
||||
model,
|
||||
{
|
||||
"chat": True,
|
||||
"tools": False,
|
||||
"vision": False,
|
||||
"max_tokens": 4096,
|
||||
"context_window": 4096,
|
||||
},
|
||||
)
|
||||
|
||||
def supports_capability(self, model: str, capability: str) -> bool:
|
||||
"""Check if a model supports a specific capability"""
|
||||
caps = self.get_model_capabilities(model)
|
||||
return caps.get(capability, False)
|
||||
|
||||
def get_api_config(self) -> Dict[str, Any]:
|
||||
"""Get API configuration (timeout, retries, etc.)"""
|
||||
return self._config["api"]
|
||||
|
||||
def get_test_settings(self) -> Dict[str, Any]:
|
||||
"""Get test configuration settings"""
|
||||
return self._config["test_settings"]
|
||||
|
||||
def get_integration_settings(self, integration: str) -> Dict[str, Any]:
|
||||
"""Get integration-specific settings"""
|
||||
return self._config["integration_settings"].get(integration, {})
|
||||
|
||||
def get_environment_config(self, environment: str | None = None) -> Dict[str, Any]:
|
||||
"""Get environment-specific configuration
|
||||
|
||||
Args:
|
||||
environment: Environment name (development, production, etc.)
|
||||
If None, uses TEST_ENV environment variable or 'development'
|
||||
"""
|
||||
if environment is None:
|
||||
environment = os.getenv("TEST_ENV", "development")
|
||||
|
||||
return self._config["environments"].get(environment, {})
|
||||
|
||||
def get_logging_config(self) -> Dict[str, Any]:
|
||||
"""Get logging configuration"""
|
||||
return self._config["logging"]
|
||||
|
||||
def list_integrations(self) -> list:
|
||||
"""List all configured integrations"""
|
||||
return list(INTEGRATION_TO_PROVIDER_MAP.keys())
|
||||
|
||||
def list_models(self, integration: str | None = None) -> Dict[str, Any]:
|
||||
"""List all models for an integration or all integrations"""
|
||||
if integration:
|
||||
# Map integration to provider
|
||||
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
|
||||
if not provider:
|
||||
raise ValueError(f"Unknown integration: {integration}")
|
||||
|
||||
if "providers" not in self._config or provider not in self._config["providers"]:
|
||||
raise ValueError(f"No provider configuration for: {provider}")
|
||||
|
||||
return {integration: self._config["providers"][provider]}
|
||||
|
||||
# Return all providers mapped to their integration names
|
||||
result = {}
|
||||
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
|
||||
if "providers" in self._config and provider in self._config["providers"]:
|
||||
result[integration] = self._config["providers"][provider]
|
||||
|
||||
return result
|
||||
|
||||
def validate_config(self) -> bool:
|
||||
"""Validate configuration completeness"""
|
||||
required_sections = ["bifrost", "providers", "api", "test_settings"]
|
||||
|
||||
for section in required_sections:
|
||||
if section not in self._config:
|
||||
raise ValueError(f"Missing required configuration section: {section}")
|
||||
|
||||
# Validate Bifrost configuration
|
||||
bifrost = self._config["bifrost"]
|
||||
if "base_url" not in bifrost or "endpoints" not in bifrost:
|
||||
raise ValueError("Bifrost configuration missing base_url or endpoints")
|
||||
|
||||
# Validate that all integrations map to valid providers
|
||||
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
|
||||
if provider not in self._config["providers"]:
|
||||
raise ValueError(
|
||||
f"Integration '{integration}' maps to provider '{provider}' "
|
||||
f"which is not configured in providers section"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def print_config_summary(self):
|
||||
"""Print a summary of the configuration"""
|
||||
print("🔧 BIFROST INTEGRATION TEST CONFIGURATION")
|
||||
print("=" * 80)
|
||||
|
||||
# Bifrost configuration
|
||||
bifrost = self.get_bifrost_config()
|
||||
print("\n🌉 BIFROST GATEWAY:")
|
||||
print(f" Base URL: {bifrost.base_url}")
|
||||
print(" Endpoints:")
|
||||
for integration, endpoint in bifrost.endpoints.items():
|
||||
full_url = f"{bifrost.base_url.rstrip('/')}/{endpoint}"
|
||||
print(f" {integration}: {full_url}")
|
||||
|
||||
# Model configurations
|
||||
print("\n🤖 MODEL CONFIGURATIONS (via providers):")
|
||||
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
|
||||
if "providers" in self._config and provider in self._config["providers"]:
|
||||
models = self._config["providers"][provider]
|
||||
print(f" {integration.upper()} → {provider}:")
|
||||
print(f" Chat: {models.get('chat', 'N/A')}")
|
||||
print(f" Vision: {models.get('vision', 'N/A')}")
|
||||
print(f" Tools: {models.get('tools', 'N/A')}")
|
||||
alternatives = models.get('alternatives', [])
|
||||
print(f" Alternatives: {len(alternatives)} models")
|
||||
|
||||
# API settings
|
||||
api_config = self.get_api_config()
|
||||
print("\n⚙️ API SETTINGS:")
|
||||
print(f" Timeout: {api_config['timeout']}s")
|
||||
print(f" Max Retries: {api_config['max_retries']}")
|
||||
print(f" Retry Delay: {api_config['retry_delay']}s")
|
||||
|
||||
print(f"\n✅ Configuration loaded successfully from: {self.config_path}")
|
||||
|
||||
def get_provider_model(self, provider: str, capability: str = "chat") -> str:
|
||||
"""Get model name for a provider and capability
|
||||
|
||||
Args:
|
||||
provider: Provider name (e.g., 'openai', 'anthropic', 'gemini')
|
||||
capability: Capability type (default: 'chat')
|
||||
|
||||
Returns:
|
||||
Model name suitable for the provider and capability
|
||||
"""
|
||||
if "providers" not in self._config:
|
||||
# Fallback to old behavior if providers section doesn't exist
|
||||
return ""
|
||||
|
||||
providers = self._config["providers"]
|
||||
if provider not in providers:
|
||||
return ""
|
||||
|
||||
provider_models = providers[provider]
|
||||
return provider_models.get(capability, "")
|
||||
|
||||
def get_provider_api_key_env(self, provider: str) -> str:
|
||||
"""Get the environment variable name for a provider's API key
|
||||
|
||||
Args:
|
||||
provider: Provider name
|
||||
|
||||
Returns:
|
||||
Environment variable name
|
||||
"""
|
||||
if "provider_api_keys" not in self._config:
|
||||
return ""
|
||||
|
||||
return self._config["provider_api_keys"].get(provider, "")
|
||||
|
||||
def is_provider_available(self, provider: str) -> bool:
|
||||
"""Check if a provider is available (has API key in environment)
|
||||
|
||||
Args:
|
||||
provider: Provider name
|
||||
|
||||
Returns:
|
||||
True if provider's API key is set in environment
|
||||
"""
|
||||
env_var = self.get_provider_api_key_env(provider)
|
||||
if not env_var:
|
||||
return False
|
||||
|
||||
api_key = os.getenv(env_var)
|
||||
return api_key is not None and api_key.strip() != ""
|
||||
|
||||
def get_available_providers(self) -> List[str]:
|
||||
"""Get list of providers that are available (have API keys configured)
|
||||
|
||||
Returns:
|
||||
List of available provider names
|
||||
"""
|
||||
if "providers" not in self._config:
|
||||
return []
|
||||
|
||||
available = []
|
||||
for provider in self._config["providers"].keys():
|
||||
if self.is_provider_available(provider):
|
||||
available.append(provider)
|
||||
|
||||
return available
|
||||
|
||||
def provider_supports_scenario(self, provider: str, scenario: str) -> bool:
|
||||
"""Check if a provider supports a specific test scenario
|
||||
|
||||
Args:
|
||||
provider: Provider name
|
||||
scenario: Scenario name
|
||||
|
||||
Returns:
|
||||
True if provider supports the scenario
|
||||
"""
|
||||
if "provider_scenarios" not in self._config:
|
||||
return False
|
||||
|
||||
if provider not in self._config["provider_scenarios"]:
|
||||
return False
|
||||
|
||||
scenarios = self._config["provider_scenarios"][provider]
|
||||
return scenarios.get(scenario, False)
|
||||
|
||||
def get_providers_for_scenario(self, scenario: str) -> List[str]:
|
||||
"""Get list of available providers that support a specific scenario
|
||||
|
||||
Args:
|
||||
scenario: Scenario name
|
||||
|
||||
Returns:
|
||||
List of provider names that support the scenario
|
||||
"""
|
||||
available_providers = self.get_available_providers()
|
||||
providers = []
|
||||
|
||||
for provider in available_providers:
|
||||
if self.provider_supports_scenario(provider, scenario):
|
||||
providers.append(provider)
|
||||
|
||||
return providers
|
||||
|
||||
def get_scenario_capability(self, scenario: str) -> str:
|
||||
"""Get the capability type for a scenario
|
||||
|
||||
Args:
|
||||
scenario: Scenario name
|
||||
|
||||
Returns:
|
||||
Capability type (e.g., 'chat', 'vision', 'tools')
|
||||
"""
|
||||
if "scenario_capabilities" not in self._config:
|
||||
return "chat" # Default
|
||||
|
||||
return self._config["scenario_capabilities"].get(scenario, "chat")
|
||||
|
||||
def get_virtual_key(self) -> str:
|
||||
"""Get the virtual key value for testing
|
||||
|
||||
Returns:
|
||||
Virtual key string or empty string if not configured
|
||||
"""
|
||||
if "virtual_key" not in self._config:
|
||||
return ""
|
||||
|
||||
vk_config = self._config["virtual_key"]
|
||||
if not vk_config.get("enabled", False):
|
||||
return ""
|
||||
|
||||
return vk_config.get("value", "")
|
||||
|
||||
def is_virtual_key_configured(self) -> bool:
|
||||
"""Check if virtual key testing is enabled and configured
|
||||
|
||||
Returns:
|
||||
True if virtual key is available for testing
|
||||
"""
|
||||
vk = self.get_virtual_key()
|
||||
return vk is not None and vk.strip() != ""
|
||||
|
||||
|
||||
# Global configuration instance
|
||||
_config_loader = None
|
||||
|
||||
|
||||
def get_config() -> ConfigLoader:
|
||||
"""Get global configuration instance"""
|
||||
global _config_loader
|
||||
if _config_loader is None:
|
||||
_config_loader = ConfigLoader()
|
||||
return _config_loader
|
||||
|
||||
|
||||
def get_integration_url(integration: str) -> str:
|
||||
return get_config().get_integration_url(integration)
|
||||
|
||||
|
||||
def get_model(integration: str, model_type: str = "chat") -> str:
|
||||
"""Convenience function to get model name"""
|
||||
return get_config().get_model(integration, model_type)
|
||||
|
||||
|
||||
def get_model_capabilities(model: str) -> Dict[str, Any]:
|
||||
"""Convenience function to get model capabilities"""
|
||||
return get_config().get_model_capabilities(model)
|
||||
|
||||
|
||||
def supports_capability(model: str, capability: str) -> bool:
|
||||
"""Convenience function to check model capability"""
|
||||
return get_config().supports_capability(model, capability)
|
||||
|
||||
|
||||
def get_provider_model(provider: str, capability: str = "chat") -> str:
|
||||
"""Convenience function to get provider model"""
|
||||
return get_config().get_provider_model(provider, capability)
|
||||
|
||||
|
||||
def is_provider_available(provider: str) -> bool:
|
||||
"""Convenience function to check provider availability"""
|
||||
return get_config().is_provider_available(provider)
|
||||
|
||||
|
||||
def get_available_providers() -> List[str]:
|
||||
"""Convenience function to get available providers"""
|
||||
return get_config().get_available_providers()
|
||||
|
||||
|
||||
def provider_supports_scenario(provider: str, scenario: str) -> bool:
|
||||
"""Convenience function to check scenario support"""
|
||||
return get_config().provider_supports_scenario(provider, scenario)
|
||||
|
||||
|
||||
def get_providers_for_scenario(scenario: str) -> List[str]:
|
||||
"""Convenience function to get providers for scenario"""
|
||||
return get_config().get_providers_for_scenario(scenario)
|
||||
|
||||
|
||||
def get_virtual_key() -> str:
|
||||
"""Convenience function to get virtual key"""
|
||||
return get_config().get_virtual_key()
|
||||
|
||||
|
||||
def is_virtual_key_configured() -> bool:
|
||||
"""Convenience function to check if virtual key is configured"""
|
||||
return get_config().is_virtual_key_configured()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Print configuration summary when run directly
|
||||
config = get_config()
|
||||
config.validate_config()
|
||||
config.print_config_summary()
|
||||
66
tests/integrations/python/tests/utils/models.py
Normal file
66
tests/integrations/python/tests/utils/models.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""
|
||||
Model configurations for each integration.
|
||||
|
||||
This file now acts as a compatibility layer and convenience wrapper
|
||||
around the new configuration system in config.yml and config_loader.py.
|
||||
|
||||
All model data is now centralized in config.yml for easier maintenance.
|
||||
"""
|
||||
|
||||
from typing import Dict, List
|
||||
from dataclasses import dataclass
|
||||
from .config_loader import get_config
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrationModels:
|
||||
"""Model configuration for a integration"""
|
||||
|
||||
chat: str # Primary chat model
|
||||
vision: str # Vision/multimodal model
|
||||
tools: str # Function calling model
|
||||
alternatives: List[str] # Alternative models for testing
|
||||
|
||||
|
||||
def get_integration_models() -> Dict[str, IntegrationModels]:
|
||||
"""Get all integration model configurations from config.yml"""
|
||||
config = get_config()
|
||||
integration_models = {}
|
||||
|
||||
for integration in config.list_integrations():
|
||||
models_config = config.list_models(integration)
|
||||
integration_models[integration] = IntegrationModels(
|
||||
chat=models_config["chat"],
|
||||
vision=models_config["vision"],
|
||||
tools=models_config["tools"],
|
||||
alternatives=models_config["alternatives"],
|
||||
)
|
||||
|
||||
return integration_models
|
||||
|
||||
|
||||
# Backward compatibility - load from config
|
||||
INTEGRATION_MODELS = get_integration_models()
|
||||
|
||||
|
||||
def get_alternatives(integration: str) -> List[str]:
|
||||
"""Get alternative models for a integration"""
|
||||
config = get_config()
|
||||
return config.get_model_alternatives(integration)
|
||||
|
||||
|
||||
def list_all_models() -> Dict[str, Dict[str, str]]:
|
||||
"""List all models by integration and type"""
|
||||
config = get_config()
|
||||
return config.list_models()
|
||||
|
||||
|
||||
# Print model summary for documentation
|
||||
def print_model_summary():
|
||||
"""Print a summary of all models and their capabilities"""
|
||||
config = get_config()
|
||||
config.print_config_summary()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print_model_summary()
|
||||
141
tests/integrations/python/tests/utils/parametrize.py
Normal file
141
tests/integrations/python/tests/utils/parametrize.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Parametrization utilities for cross-provider testing.
|
||||
|
||||
This module provides pytest parametrization for testing across multiple AI providers
|
||||
with automatic scenario-based filtering.
|
||||
"""
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
from .config_loader import get_config
|
||||
|
||||
|
||||
def get_cross_provider_params_for_scenario(
|
||||
scenario: str,
|
||||
include_providers: List[str] | None = None,
|
||||
exclude_providers: List[str] | None = None,
|
||||
) -> List[Tuple[str, str]]:
|
||||
config = get_config()
|
||||
|
||||
# Get providers that support this scenario
|
||||
providers = config.get_providers_for_scenario(scenario)
|
||||
|
||||
# Apply include filter
|
||||
if include_providers:
|
||||
providers = [p for p in providers if p in include_providers]
|
||||
|
||||
# Apply exclude filter
|
||||
if exclude_providers:
|
||||
providers = [p for p in providers if p not in exclude_providers]
|
||||
|
||||
# Generate (provider, model) tuples
|
||||
# Automatically maps: scenario → capability → model
|
||||
params = []
|
||||
for provider in sorted(providers): # Sort for consistent test ordering
|
||||
# Map scenario to capability, then get model
|
||||
capability = config.get_scenario_capability(scenario)
|
||||
model = config.get_provider_model(provider, capability)
|
||||
|
||||
# Only add if provider has a model for this scenario's capability
|
||||
if model:
|
||||
params.append((provider, model))
|
||||
|
||||
# If no providers available, return a dummy tuple to avoid pytest errors
|
||||
# The test will be skipped with appropriate message
|
||||
if not params:
|
||||
params = [("_no_providers_", "_no_model_")]
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def get_cross_provider_params_with_vk_for_scenario(
|
||||
scenario: str,
|
||||
include_providers: List[str] | None = None,
|
||||
exclude_providers: List[str] | None = None,
|
||||
) -> List[Tuple[str, str, bool]]:
|
||||
"""
|
||||
Get cross-provider parameters with virtual key flag for pytest parametrization.
|
||||
|
||||
When virtual key is configured, each provider/model combo is tested twice:
|
||||
once without VK (vk_enabled=False) and once with VK (vk_enabled=True).
|
||||
|
||||
Args:
|
||||
scenario: Test scenario name
|
||||
include_providers: Optional list of providers to include
|
||||
exclude_providers: Optional list of providers to exclude
|
||||
|
||||
Returns:
|
||||
List of (provider, model, vk_enabled) tuples
|
||||
|
||||
Example:
|
||||
When VK is configured:
|
||||
[
|
||||
("openai", "gpt-4o", False),
|
||||
("openai", "gpt-4o", True),
|
||||
("anthropic", "claude-3", False),
|
||||
("anthropic", "claude-3", True),
|
||||
]
|
||||
"""
|
||||
config = get_config()
|
||||
|
||||
# Get base params without VK
|
||||
base_params = get_cross_provider_params_for_scenario(
|
||||
scenario, include_providers, exclude_providers
|
||||
)
|
||||
|
||||
# Handle the dummy tuple case
|
||||
if base_params == [("_no_providers_", "_no_model_")]:
|
||||
return [("_no_providers_", "_no_model_", False)]
|
||||
|
||||
# Build params list with VK flag
|
||||
params = []
|
||||
vk_configured = config.is_virtual_key_configured()
|
||||
|
||||
for provider, model in base_params:
|
||||
# Always add the non-VK variant
|
||||
params.append((provider, model, False))
|
||||
|
||||
# Add VK variant only if VK is configured
|
||||
if vk_configured:
|
||||
params.append((provider, model, True))
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def format_vk_test_id(provider: str, model: str, vk_enabled: bool) -> str:
|
||||
"""
|
||||
Format test ID for virtual key parameterized tests.
|
||||
|
||||
Args:
|
||||
provider: Provider name
|
||||
model: Model name
|
||||
vk_enabled: Whether VK is enabled
|
||||
|
||||
Returns:
|
||||
Formatted test ID string
|
||||
|
||||
Example:
|
||||
>>> format_vk_test_id("openai", "gpt-4o", True)
|
||||
"openai-gpt-4o-with_vk"
|
||||
>>> format_vk_test_id("openai", "gpt-4o", False)
|
||||
"openai-gpt-4o-no_vk"
|
||||
"""
|
||||
vk_suffix = "with_vk" if vk_enabled else "no_vk"
|
||||
return f"{provider}-{model}-{vk_suffix}"
|
||||
|
||||
|
||||
def format_provider_model(provider: str, model: str) -> str:
|
||||
"""
|
||||
Format provider and model into the standard "provider/model" format.
|
||||
|
||||
Args:
|
||||
provider: Provider name
|
||||
model: Model name
|
||||
|
||||
Returns:
|
||||
Formatted string "provider/model"
|
||||
|
||||
Example:
|
||||
>>> format_provider_model("openai", "gpt-4o")
|
||||
"openai/gpt-4o"
|
||||
"""
|
||||
return f"{provider}/{model}"
|
||||
5583
tests/integrations/python/uv.lock
generated
Normal file
5583
tests/integrations/python/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user