first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1 @@
3.12

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,348 @@
{
"$schema": "https://www.getbifrost.ai/schema",
"mcp": {
"client_configs": [
{
"name": "sse_mcp",
"connection_type": "sse",
"connection_string": "env.MCP_SSE_URL",
"auth_type": "headers",
"headers": {
"Authorization": "env.MCP_SSE_AUTHORIZATION",
"ENV_EXA_API_KEY": "env.MCP_SSE_API_KEY"
},
"tools_to_execute": ["*"],
"tools_to_auto_execute": ["*"]
}
]
},
"providers": {
"openai": {
"keys": [
{
"name": "OpenAI API Key",
"value": "env.OPENAI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"elevenlabs": {
"keys": [
{
"name": "ElevenLabs API Key",
"value": "env.ELEVENLABS_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": false
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"xai": {
"keys": [
{
"name": "Xai API Key",
"value": "env.XAI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": false
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"huggingface": {
"keys": [
{
"name": "Hugging Face API Key",
"value": "env.HUGGING_FACE_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": false
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"anthropic": {
"keys": [
{
"name": "Anthropic API Key",
"value": "env.ANTHROPIC_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"gemini": {
"keys": [
{
"name": "Gemini API Key",
"value": "env.GEMINI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"vertex": {
"keys": [
{
"name": "Vertex API Key",
"vertex_key_config": {
"project_id": "env.VERTEX_PROJECT_ID",
"region": "env.GOOGLE_LOCATION",
"auth_credentials": "env.VERTEX_CREDENTIALS"
},
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"mistral": {
"keys": [
{
"name": "Mistral API Key",
"value": "env.MISTRAL_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"cohere": {
"keys": [
{
"name": "Cohere API Key",
"value": "env.COHERE_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"parasail": {
"keys": [
{
"name": "Parasail API Key",
"value": "env.PARASAIL_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"groq": {
"keys": [
{
"name": "Groq API Key",
"value": "env.GROQ_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"perplexity": {
"keys": [
{
"name": "Perplexity API Key",
"value": "env.PERPLEXITY_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"cerebras": {
"keys": [
{
"name": "Cerebras API Key",
"value": "env.CEREBRAS_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"openrouter": {
"keys": [
{
"name": "OpenRouter API Key",
"value": "env.OPENROUTER_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"azure": {
"keys": [
{
"name": "Azure API Key",
"value": "env.AZURE_API_KEY",
"azure_key_config": {
"endpoint": "env.AZURE_ENDPOINT",
"api_version": "env.AZURE_API_VERSION",
"deployments": {
"gpt-4o": "gpt-4o",
"gpt-4o-mini": "gpt-4o-mini",
"gpt-4o-mini-tts": "gpt-4o-mini-tts",
"o1": "o1",
"gpt-4o-batch": "gpt-4o-batch",
"whisper": "whisper",
"text-embedding-3-small": "text-embedding-3-small",
"gpt-image-1": "gpt-image-1"
}
},
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"bedrock": {
"keys": [
{
"name": "Bedrock API Key",
"bedrock_key_config": {
"access_key": "env.AWS_ACCESS_KEY_ID",
"secret_key": "env.AWS_SECRET_ACCESS_KEY",
"region": "env.AWS_REGION",
"arn": "env.AWS_ARN"
},
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"replicate": {
"keys": [
{
"name": "Replicate API Key",
"value": "env.REPLICATE_API_KEY",
"weight": 1
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"runway": {
"keys": [
{
"name": "Runway API Key",
"value": "env.RUNWAY_API_KEY",
"weight": 1
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"nebius": {
"keys": [
{
"name": "Nebius API Key",
"value": "env.NEBIUS_API_KEY",
"weight": 1
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
}
},
"config_store": {
"enabled": true,
"type": "sqlite",
"config": {
"path": "./config.db"
}
},
"logs_store": {
"enabled": true,
"type": "sqlite",
"config": {
"path": "./logs.db"
}
},
"governance": {
"virtual_keys": [
{
"name": "Test Key",
"id": "vk-test",
"value": "sk-bf-test-key",
"is_active": true,
"provider_configs": [
{ "provider": "openai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "elevenlabs", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "xai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "huggingface", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "anthropic", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "gemini", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "vertex", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "mistral", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "cohere", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "parasail", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "groq", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "perplexity", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "cerebras", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "openrouter", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "azure", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "bedrock", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 }
]
}
]
},
"client": {
"drop_excess_requests": false,
"initial_pool_size": 300,
"allowed_origins": [
"*"
],
"enable_logging": true,
"enforce_auth_on_inference": false,
"allow_direct_keys": false,
"max_request_body_size_mb": 100
}
}

View File

@@ -0,0 +1,942 @@
# Bifrost Integration Tests Configuration
# This file centralizes all configuration for AI integration clients and test settings
# Bifrost Gateway Configuration
# All integrations route through Bifrost as a proxy/gateway
bifrost:
base_url: "${BIFROST_BASE_URL:-http://localhost:8080}"
# Integration-specific endpoints (suffixes appended to base_url)
endpoints:
openai: "openai"
anthropic: "anthropic"
cohere: "cohere"
google: "genai"
"gemini_passthrough": "genai_passthrough"
"anthropic_passthrough": "anthropic_passthrough"
litellm: "litellm"
langchain: "langchain"
pydanticai: "pydanticai"
bedrock: "bedrock"
azure: "openai" # Azure uses OpenAI routes with /openai/deployments/{id} paths
# Full URLs constructed as: {base_url.rstrip('/')}/{endpoints[integration]}
# Examples:
# - OpenAI: http://localhost:8080/openai
# - Anthropic: http://localhost:8080/anthropic
# - Google: http://localhost:8080/genai
# - LiteLLM: http://localhost:8080/litellm
# - LangChain: http://localhost:8080/langchain
# API Configuration
api:
timeout: 30 # seconds
max_retries: 3
retry_delay: 1 # seconds
# Provider model configurations
# Integrations (openai, anthropic, google, litellm, langchain) map to these providers
providers:
openai:
chat: "gpt-4o"
vision: "gpt-4o"
file: "gpt-4o"
tools: "gpt-4o-mini"
speech: "tts-1"
transcription: "whisper-1"
embeddings: "text-embedding-3-small"
image_generation: "gpt-image-1"
image_edit: "gpt-image-1"
streaming: "gpt-4o-mini"
thinking: "gpt-5.1"
batch_file_upload: "gpt-4o-mini"
batch_list: "gpt-4o"
batch_retrieve: "gpt-4o"
batch_cancel: "gpt-4o"
batch_inline: "gpt-4o"
file_upload: "gpt-4o-mini"
file_list: "gpt-4o-mini"
file_retrieve: "gpt-4o-mini"
file_delete: "gpt-4o-mini"
file_content: "gpt-4o-mini"
count_tokens: "gpt-4o-mini"
video: "sora-2"
alternatives:
- "gpt-4"
- "gpt-4-turbo-preview"
- "gpt-4o"
- "gpt-3.5-turbo"
azure:
chat: "gpt-4o"
vision: "gpt-4o"
tools: "gpt-4o-mini"
streaming: "gpt-4o-mini"
speech: "gpt-4o-mini-tts"
transcription: "whisper"
embeddings: "text-embedding-3-small"
image_generation: "gpt-image-1"
thinking: "o1"
batch_file_upload: "gpt-4o-batch"
batch_list: "gpt-4o-batch"
batch_retrieve: "gpt-4o-batch"
batch_cancel: "gpt-4o-batch"
file_upload: "gpt-4o"
file_list: "gpt-4o"
file_retrieve: "gpt-4o"
file_delete: "gpt-4o"
file_content: "gpt-4o"
xai:
chat: "grok-4-0709"
vision: "grok-2-vision-1212"
tools: "grok-4-0709"
streaming: "grok-4-0709"
thinking: "grok-3-mini"
anthropic:
chat: "claude-sonnet-4-5-20250929"
vision: "claude-sonnet-4-5-20250929"
file: "claude-sonnet-4-5-20250929"
tools: "claude-sonnet-4-5-20250929"
streaming: "claude-sonnet-4-5-20250929"
thinking: "claude-opus-4-5"
batch_file_upload: "claude-sonnet-4-20250514"
batch_inline: "claude-sonnet-4-20250514"
batch_list: "claude-sonnet-4-20250514"
batch_retrieve: "claude-sonnet-4-20250514"
batch_cancel: "claude-sonnet-4-20250514"
file_upload: "claude-sonnet-4-20250514"
file_list: "claude-sonnet-4-20250514"
file_retrieve: "claude-sonnet-4-20250514"
file_delete: "claude-sonnet-4-20250514"
file_content: "claude-sonnet-4-20250514"
count_tokens: "claude-sonnet-4-5-20250929"
alternatives:
- "claude-3-sonnet-20240229"
- "claude-3-opus-20240229"
- "claude-3-5-sonnet-20241022"
- "claude-3-haiku-20240307"
gemini:
chat: "gemini-3-flash-preview"
vision: "gemini-3-flash-preview"
tools: "gemini-3-flash-preview"
file: "gemini-2.5-flash"
thinking: "gemini-3-pro-preview"
speech: "gemini-2.5-flash-preview-tts"
transcription: "gemini-2.5-flash"
embeddings: "gemini-embedding-001"
image_generation: "gemini-2.5-flash-image"
image_edit: "gemini-3-pro-image-preview"
imagen: "imagen-4.0-generate-001"
video: "veo-3.1-generate-preview"
streaming: "gemini-3-flash-preview"
batch_create: "gemini-2.5-flash"
batch_inline: "gemini-2.5-flash"
batch_file_upload: "gemini-2.5-flash"
batch_list: "gemini-2.5-flash"
batch_retrieve: "gemini-2.5-flash"
batch_cancel: "gemini-2.5-flash"
batch_s3: "gemini-2.5-flash"
file_upload: "gemini-2.0-flash"
file_list: "gemini-2.0-flash"
file_content: "gemini-2.0-flash"
file_download: "gemini-2.0-flash"
file_retrieve: "gemini-2.0-flash"
file_delete: "gemini-2.0-flash"
count_tokens: "gemini-2.5-flash"
alternatives:
- "gemini-1.5-pro"
- "gemini-1.5-flash"
- "gemini-1.0-pro"
- "gemini-2.0-flash-001"
vertex:
chat: "gemini-2.5-flash"
vision: "claude-sonnet-4-5"
tools: "gemini-2.5-flash"
file: "claude-sonnet-4-5"
thinking: "gemini-2.5-pro"
embeddings: "gemini-embedding-001"
image_generation: "imagen-4.0-generate-001"
image_edit: "imagen-3.0-capability-001"
imagen: "imagen-4.0-generate-001"
streaming: "gemini-2.5-flash"
count_tokens: "claude-sonnet-4-5"
video: "veo-3.1-generate-preview"
bedrock:
chat: "global.anthropic.claude-sonnet-4-20250514-v1:0"
vision: "global.anthropic.claude-sonnet-4-20250514-v1:0"
file: "global.anthropic.claude-sonnet-4-20250514-v1:0"
tools: "global.anthropic.claude-sonnet-4-20250514-v1:0"
streaming: "global.anthropic.claude-sonnet-4-20250514-v1:0"
thinking: "us.anthropic.claude-opus-4-5-20251101-v1:0"
text_completion: "mistral.mistral-7b-instruct-v0:2"
embeddings: "global.cohere.embed-v4:0"
image_generation: "amazon.titan-image-generator-v2:0"
image_variation: "amazon.titan-image-generator-v2:0"
batch_inline: "anthropic.claude-3-5-sonnet-20240620-v1:0"
image_edit: "amazon.nova-canvas-v1:0"
batch_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_retrieve: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_cancel: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_s3: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_delete: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_content: "anthropic.claude-3-5-sonnet-20240620-v1:0"
count_tokens: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
alternatives:
- "anthropic.claude-3-opus-20240229-v1:0"
cohere:
chat: "command-a-03-2025"
vision: "command-a-vision-07-2025"
tools: "command-a-03-2025"
embeddings: "embed-v4.0"
streaming: "command-a-03-2025"
count_tokens: "command-a-03-2025"
alternatives:
- "command-r-plus"
huggingface:
image_generation: "fal-ai/fal-ai/flux/dev"
image_edit: "fal-ai/fal-ai/flux-2/edit"
nebius:
image_generation: "black-forest-labs/flux-schnell"
replicate:
video: "openai/sora-2-pro"
runway:
video: "gen4.5"
# Provider availability configuration
# Maps provider names to their API key environment variables
provider_api_keys:
openai: "OPENAI_API_KEY"
anthropic: "ANTHROPIC_API_KEY"
gemini: "GEMINI_API_KEY"
vertex: "VERTEX_API_KEY"
bedrock: "AWS_ACCESS_KEY_ID"
cohere: "COHERE_API_KEY"
xai: "XAI_API_KEY"
huggingface: "HUGGING_FACE_API_KEY"
nebius: "NEBIUS_API_KEY"
azure: "AZURE_API_KEY"
replicate: "REPLICATE_API_KEY"
runway: "RUNWAY_API_KEY"
# Provider test scenarios - which tests each provider supports
provider_scenarios:
openai:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
"web_search": true
image_url: true
image_base64: true
file_input: true
multiple_images: true
speech_synthesis: true
speech_synthesis_streaming: true
transcription: true
transcription_streaming: true
embeddings: true
image_generation: true
image_edit: true
thinking: true
prompt_caching: false
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: true # PydanticAI structured output works reliably with OpenAI
pydanticai_streaming: true # PydanticAI streaming works with OpenAI
batch_file_upload: true
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: true # OpenAI supports inline requests for batch
batch_s3: false # OpenAI does not use S3 for batch
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: true
count_tokens: true
video_generation: false # disabled for now because of long running operations
azure:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
web_search: false
image_url: true
image_base64: true
file_input: false
multiple_images: true
speech_synthesis: true
speech_synthesis_streaming: true
transcription: true
transcription_streaming: true
embeddings: true
image_generation: true
image_edit: false
thinking: true
prompt_caching: false
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: false
pydantic_structured_output: false
pydanticai_streaming: false
batch_file_upload: true
batch_create: false
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: false
batch_s3: false
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: true
count_tokens: false
xai:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: true
image_base64: false
file_input: false
multiple_images: false
thinking: true
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: true
pydanticai_streaming: true
anthropic:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
web_search: true
image_url: true
image_base64: true
file_input: true
file_input_text: true
multiple_images: true
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: false
thinking: true
prompt_caching: true
citations: true
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: false
pydantic_structured_output: true # PydanticAI structured output works with Anthropic
pydanticai_streaming: true # PydanticAI streaming works with Anthropic
batch_file_upload: true # Anthropic batch API uses inline requests, not files
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: true # Anthropic uses inline requests for batch
batch_s3: false # Anthropic does not use S3 for batch
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: true
count_tokens: true
gemini:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: false # Gemini requires base64 or file upload
image_base64: true
file_input: true
multiple_images: false
speech_synthesis: true
speech_synthesis_streaming: true
transcription: true
transcription_streaming: true
embeddings: true
image_generation: true # Gemini image generation via responseModalities
image_edit: true # Gemini image editing
imagen: true # Imagen via :predict endpoint
imagen_edit: true # Imagen editing via image_edit model
thinking: true
video_generation: false # disabled for now because of long running operations
prompt_caching: false
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # PydanticAI structured output unreliable via Bifrost for Gemini
pydanticai_streaming: false # PydanticAI GoogleModel streaming has asyncio issues
batch_file_upload: true # Gemini supports file upload via Files API
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: true # Gemini uses inline requests for batch (synchronous)
batch_s3: false # Gemini does not use S3 for batch
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: false # Gemini doesn't support direct file download
count_tokens: true
context_caching: true # Gemini context caching (Caches API) via Bifrost passthrough
vertex:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: false # Gemini requires base64 or file upload
image_base64: true
file_input: true
multiple_images: false
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: true
image_generation: true
image_edit: true
imagen: true # Imagen via :predict endpoint
imagen_edit: true # Imagen editing via image_edit model
thinking: true
prompt_caching: false
list_models: true
video_generation: false # disabled for now because of long running operations
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # PydanticAI structured output unreliable via Bifrost for Gemini
pydanticai_streaming: false # PydanticAI GoogleModel streaming has asyncio issues
batch_file_upload: false # Gemini supports file upload via Files API
batch_create: false
batch_list: false
batch_retrieve: false
batch_cancel: false
batch_inline: false # Gemini uses inline requests for batch (synchronous)
batch_s3: false # Gemini does not use S3 for batch
file_upload: false
file_list: false
file_retrieve: false
file_delete: false
file_content: false # Gemini doesn't support direct file download
count_tokens: false
bedrock:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: false
image_base64: true
file_input: true
file_input_text: true
multiple_images: false
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: true
thinking: true
prompt_caching: true
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # Bedrock not supported in PydanticAI tests
pydanticai_streaming: false # Bedrock not supported in PydanticAI tests
batch_file_upload: true # Bedrock uses S3 wrapper for file uploads
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: false # Bedrock batch uses S3, not inline requests via API
batch_s3: true # Bedrock uses S3 for batch input/output
file_upload: true # Bedrock uses S3 wrapper for file storage
file_list: true # Bedrock lists files in S3 bucket
file_retrieve: true # Bedrock retrieves S3 object metadata
file_delete: true # Bedrock deletes S3 objects
file_content: true # Bedrock downloads S3 object content
image_generation: true # Bedrock supports image generation via invoke (Titan, SA, cross-provider)
image_edit: true # Bedrock supports image editing via invoke (Titan, SA)
image_variation: true # Bedrock supports image variation via invoke (Titan IMAGE_VARIATION)
count_tokens: true # Bedrock supports token counting via CountTokens API
cohere:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: false
image_url: true
image_base64: true
multiple_images: true
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: true
thinking: false
prompt_caching: false
citations: false
list_models: false
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # PydanticAI CohereModel doesn't reliably support structured output
pydanticai_streaming: false # PydanticAI CohereModel doesn't implement streaming
batch_file_upload: false
batch_create: false
batch_list: false
batch_retrieve: false
batch_cancel: false
batch_inline: false # Cohere does not support batch API
batch_s3: false # Cohere does not support batch API
file_upload: false # Cohere does not support Files API
file_list: false
file_retrieve: false
file_delete: false
file_content: false
count_tokens: true
huggingface:
image_generation: true
image_edit: true
nebius:
image_generation: true
replicate:
video_generation: false # disabled for now because of long running operations
runway:
video_generation: false # disabled for now because of long running operations
# Scenario to capability mapping
# Maps test scenario names to their corresponding capability types
scenario_capabilities:
simple_chat: "chat"
multi_turn_conversation: "chat"
responses: "chat"
responses_image: "vision"
text_completion: "chat"
streaming: "streaming"
tool_calls: "tools"
multiple_tool_calls: "tools"
end2end_tool_calling: "tools"
automatic_function_calling: "tools"
web_search: "chat"
image_url: "vision"
image_base64: "vision"
file_input: "file"
file_input_text: "file"
multiple_images: "vision"
speech_synthesis: "speech"
speech_synthesis_streaming: "speech"
transcription: "transcription"
transcription_streaming: "transcription"
embeddings: "embeddings"
image_generation: "image_generation" # Uses image_generation model
image_edit: "image_edit" # Uses image_edit model
imagen: "imagen" # Uses imagen model (Gemini/Vertex)
imagen_edit: "image_edit" # Uses image_edit model for Imagen editing
thinking: "thinking"
prompt_caching: "chat"
citations: "chat"
list_models: "chat"
langchain_structured_output: "chat" # LangChain structured output uses chat capability
count_tokens: "count_tokens" # Token counting capability
pydantic_structured_output: "chat" # Structured output uses chat capability
pydanticai_streaming: "streaming" # PydanticAI streaming uses streaming capability
batch_file_upload: "batch_file_upload" # Uses batch_file_upload model directly
batch_create: "batch_create"
batch_list: "batch_list"
batch_retrieve: "batch_retrieve"
batch_cancel: "batch_cancel"
batch_inline: "batch_inline" # Uses batch_inline model directly
batch_s3: "batch_s3" # Uses batch_s3 model directly
file_upload: "file_upload" # Uses file_upload model directly
file_list: "file_list" # Uses file_list model directly
file_retrieve: "file_retrieve" # Uses file_retrieve model directly
file_delete: "file_delete" # Uses file_delete model directly
file_content: "file_content" # Uses file_content model directly
count_tokens: "chat"
video_generation: "video"
context_caching: "chat" # Gemini Caches API (passthrough)
# Model capabilities matrix
model_capabilities:
# OpenAI Models
"gpt-3.5-turbo":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 4096
context_window: 4096
"gpt-4":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 8192
context_window: 8192
"gpt-4o":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 128000
"gpt-4o-mini":
chat: true
tools: true
vision: true
streaming: true
speech: false
transcription: false
max_tokens: 4096
context_window: 128000
# OpenAI Speech Models
"tts-1":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
max_tokens: null
context_window: null
"tts-1-hd":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
max_tokens: null
context_window: null
# OpenAI Transcription Models
"whisper-1":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: true
embeddings: false
max_tokens: null
context_window: null
# OpenAI Embedding Models
"text-embedding-3-small":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: false
embeddings: true
max_tokens: null
context_window: 8191
dimensions: 1536
"text-embedding-3-large":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: false
embeddings: true
max_tokens: null
context_window: 8191
dimensions: 3072
"text-embedding-ada-002":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: false
embeddings: true
max_tokens: null
context_window: 8191
dimensions: 1536
# Anthropic Models
"claude-3-haiku-20240307":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 200000
"claude-3-sonnet-20240229":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 200000
"claude-3-opus-20240229":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 200000
# Google Models
"gemini-pro":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 8192
context_window: 32768
"gemini-2.0-flash-001":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 8192
context_window: 32768
"gemini-1.5-pro":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 8192
context_window: 1000000
# Gemini Transcription Models
"gemini-2.5-flash":
chat: true
tools: true
vision: true
streaming: true
speech: false
transcription: true
embeddings: false
max_tokens: 8192
context_window: 1000000
audio_max_duration: 34200 # 9.5 hours in seconds
"gemini-2.5-pro":
chat: true
tools: true
vision: true
streaming: true
speech: false
transcription: true
embeddings: false
max_tokens: 8192
context_window: 2000000
audio_max_duration: 34200 # 9.5 hours in seconds
# Gemini TTS Models
"gemini-2.5-flash-preview-tts":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
embeddings: false
max_tokens: 32000 # 32k token context window for TTS
context_window: 32000
audio_format: "pcm"
sample_rate: 24000
channels: 1
"gemini-2.5-pro-preview-tts":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
embeddings: false
max_tokens: 32000 # 32k token context window for TTS
context_window: 32000
audio_format: "pcm"
sample_rate: 24000
channels: 1
# Mistral Models
"mistral-7b-instruct":
chat: true
tools: false
vision: false
streaming: true
max_tokens: 4096
context_window: 32768
"mistral-8x7b-instruct":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 4096
context_window: 32768
# Test configuration
test_settings:
# Maximum tokens for test responses
max_tokens:
chat: 100
vision: 200
tools: 100
complex: 300
speech: null # Speech doesn't use token limits
transcription: null # Transcription doesn't use token limits
embeddings: null # Embeddings don't use token limits (text is the input)
# Timeout settings for tests
timeouts:
simple: 30 # seconds
complex: 60 # seconds
# Retry settings for flaky tests
retries:
max_attempts: 3
delay: 2 # seconds
# Integration-specific settings
integration_settings:
openai:
organization: "${OPENAI_ORG_ID:-}"
project: "${OPENAI_PROJECT_ID:-}"
anthropic:
version: "2023-06-01"
google:
project_id: "${GOOGLE_PROJECT_ID:-}"
location: "${GOOGLE_LOCATION:-us-central1}"
litellm:
drop_params: true
debug: false
langchain:
debug: false
streaming: true
bedrock:
region: "${AWS_REGION:-us-west-2}"
s3_bucket: "${AWS_S3_BUCKET:-}"
batch_role_arn: "${AWS_ARN:-}"
output_s3_prefix: "${AWS_OUTPUT_S3_PREFIX:-bifrost-batch-output/}"
azure:
api_version: "${AZURE_API_VERSION:-2024-10-21}"
# Environment-specific overrides
environments:
development:
api:
timeout: 60
max_retries: 5
test_settings:
timeouts:
simple: 60
complex: 120
production:
api:
timeout: 15
max_retries: 2
test_settings:
timeouts:
simple: 20
complex: 40
# Virtual key testing configuration
# When enabled, cross-provider tests will run twice: with and without the x-bf-vk header
virtual_key:
enabled: true
value: "sk-bf-test-key"
# Logging configuration
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: "tests.log"

View File

@@ -0,0 +1,12 @@
{
"type": "service_account",
"project_id": "dummy-bifrost-project",
"private_key_id": "dummy-key-id-12345",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCY+aj4fvYTj4l9\nYcgnEg7f9Y2zcck8bvYrhIY/m0NJpfUV2rOAbvgHJXUgobmUcgf6E9b76AWVN/Wm\nk6dxE+PWj1/DwkaYk4uDHpWFOn6HkF7ypLeGMamSnU+OfKFoUrRW8NfoMgh+uGVt\nwMh82qBztaTJKjN2BlxBepgR0iZKG81ySkyhaUL1Jh99E3AcNULkkp+VHTD51lw6\n4H0B197tY18GUZ+iPK3Laj9HBVOAjxqsCs4cMsWZ16R+dfZr8ZcDC0zodhcVNSsX\nA7uKZ5tKChJQEzHhk4o8ywnrsyd4E9FHKHsbs+Ye7K5qrTOxpKpDiy6DOPlknZ0/\nzLjk+SARAgMBAAECggEAAepUIktYZnmvblI//Sj8rHdJRoJGOqxNcnaW+4b+euUW\nQ1CspV1+U51amCBvza6kZ+0gaKEhi3lAAhfYQFx5YGtHTbHtKwjTL6oDrKKTncx9\nz/oJYeV6vVTOGGCjZQx2f30DwJZE0XG/1Qpl6L9SSBv14HlwY/6US75snRsWvCAc\nYlWJCozpn5ycSZDbqQBSVF3ueeHoH4ahL5Iw2NELAk87HLBGbtfvfwWpnQqlJtIi\ndQWqYYBHrqk5ThQNcJl1o3oBY7MMVE6/jWZbr+aIXtgfQlG3j+Z6PD6/7g3z65Yy\nxiOrypqpEm8UhmmxoF7UjSci+32NLj7SfdgXM2QLgwKBgQDP3iOCWBmzK72tWeF9\nkNKTXn/6niJHgaPhVTVYsAEoQMcWBtGSUF7QRWZt4qu8APRTwhThFpLVLSZOk50I\ndf9xFhWqubsif/ox1Fbd3SGswwoWhHvGHQ/JH/75akpMKTXkkLDomNNL49kNwW1E\nmb1EJPOeyuOxhw8gP7v8qJ8cAwKBgQC8ZaLgYUmMsc+IDpREN22fXNmoBE0OgrIK\nBLa9rQRAbzdmKcNxLpGAEsyiuPOrgD/9U2G9hM6kztCN53Ho86rphFiHHgN2NJfH\n/Jz/jTtM3UPKv0QCHuLTZknLFeYE3A0jNYFpRi/hjy2n0E4Gtp/0Y0ZULseMSvM5\naN7CWGS5WwKBgQDPCWb+vTcjwO5UCdDQ2v0RsS7w9K4Z4KLUnaTbp7oPWK2yX6o+\n+/PjpywFSJ5aS+0Ou6FGK9ClqSmdW+MteTGqdh+wgvtDuon9NYwrwMN4qm6SzPPm\n+C0v2sF/tIE56FX4SLEbipPx44fd7okhqarcg51uzJAK0wWazkAzv9Nx9wKBgEUk\n9EtvyWO22tkvqKEEytoDZOrycSmTNC7THhKtTnMrnmSDjXSbx9D+lVZflSbrkhCy\nqpu5A3KfaRG70SXTUHYWGbu1e0XF9bLzdtegCRSj3L6rxhUVKuC1mP3NUreT38p9\nV7rAhNA/EV2W6RwzqK80RFqfNKO72lrGr4MamBUjAoGBAJzy/47STnaW23aPutJF\nU23Kp5QDSkZzCniDBNIbuxlgZ5x2m4wK0FPRwWBcuvisG3G9VXohEfxJ0/IG8t6/\nOH1tVXYeR9pWtGIWEZuzFHL38ji4/BL3i94gW26GntJrr1ut94KHN1ynqkYRP/gK\ngRU91/0vXG+SOTubYUh5G5w3\n-----END PRIVATE KEY-----\n",
"client_email": "dummy-bifrost@dummy-bifrost-project.iam.gserviceaccount.com",
"client_id": "123456789012345678901",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dummy-bifrost%40dummy-bifrost-project.iam.gserviceaccount.com"
}

View File

@@ -0,0 +1,126 @@
[project]
name = "bifrost-integration-tests"
version = "0.1.0"
description = "Production-ready end-to-end test suite for testing AI integrations through Bifrost proxy"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
# Core testing framework
"pytest>=7.0.0",
"pytest-asyncio>=0.21.0",
# Environment and configuration
"python-dotenv>=1.0.0",
"PyYAML>=6.0",
# Image processing
"Pillow>=9.0.0",
# HTTP requests for debugging
"requests>=2.28.0",
# Type hints
"typing-extensions>=4.0.0",
# Test reporting
"pytest-html>=3.1.0",
"pytest-cov>=4.0.0",
# AI/ML SDK dependencies
"openai>=1.30.0",
"anthropic>=0.25.0",
"litellm==1.80.5",
"langchain-openai==0.1.0",
"langchain-core==0.3.81",
"langchain-anthropic==0.1.0",
"langchain-google-genai==4.1.1",
"langchain-mistralai==0.1.0",
"langgraph>=0.1.0",
"mistralai>=0.4.0",
"google-genai>=1.50.0",
"pydantic-ai>=0.1.0",
"boto3>=1.34.0",
# Testing utilities
"websocket-client>=1.6.0",
"httpx>=0.25.0",
"pytest-timeout>=2.1.0",
"pytest-mock>=3.11.0",
"pytest-rerunfailures>=11.0",
"langchain-google-vertexai>=3.1.0",
"langchain-tests>=1.0.2",
"langchain>=1.1.0",
"langchain-community>=0.4.1",
"langchain-aws>=1.1.0",
"pytest-xdist>=3.8.0",
"pyasn1>=0.6.2",
]
[project.optional-dependencies]
dev = [
"black>=23.0.0", # Code formatting
"flake8>=6.0.0", # Linting
"mypy>=1.5.0", # Type checking
]
[tool.pytest.ini_options]
# Test discovery
testpaths = ["."]
python_files = "test_*.py"
python_classes = "Test*"
python_functions = "test_*"
# Output formatting
addopts = [
"-v",
"-s", # Show print statements (no output capture)
"--tb=short",
"--strict-markers",
"--disable-warnings",
"--color=yes",
]
# Logging configuration
log_cli = true
log_cli_level = "ERROR"
log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s"
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
# Timeout settings (3 minutes per test)
timeout = 300
# Markers for test categorization
markers = [
"integration: marks tests as integration tests",
"slow: marks tests as slow running",
"e2e: marks tests as end-to-end tests",
"tool_calling: marks tests as tool calling tests",
"flaky: marks tests as flaky with automatic retries (reruns=3, reruns_delay=2)",
]
# Minimum version
minversion = "7.0"
[tool.black]
line-length = 100
target-version = ['py38', 'py39', 'py310', 'py311']
include = '\.pyi?$'
[tool.mypy]
python_version = "3.11"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = false
ignore_missing_imports = true
[tool.coverage.run]
source = ["tests"]
omit = ["*/tests/*", "*/venv/*", "*/.venv/*"]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]
[tool.uv]
exclude-newer = "2026-04-08"

View File

@@ -0,0 +1,343 @@
#!/usr/bin/env python3
"""
Bifrost Integration End-to-End Test Runner
This script runs all integration end-to-end tests for Bifrost.
It can run tests individually or all together, providing comprehensive
reporting and flexible execution options.
Usage:
python run_all_tests.py # Run all tests
python run_all_tests.py --integration openai # Run specific integration
python run_all_tests.py --list # List available integrations
python run_all_tests.py --parallel # Run tests in parallel
python run_all_tests.py --verbose # Verbose output
"""
import argparse
import subprocess
import sys
import time
import os
from pathlib import Path
from typing import List, Dict, Optional
import concurrent.futures
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class BifrostTestRunner:
"""Main test runner for Bifrost integration tests"""
def __init__(self):
self.test_dir = Path(__file__).parent
self.integrations = {
"openai": {
"file": "tests/integrations/test_openai.py",
"description": "OpenAI Python SDK integration tests",
"env_vars": ["OPENAI_API_KEY"],
},
"anthropic": {
"file": "tests/integrations/test_anthropic.py",
"description": "Anthropic Python SDK integration tests",
"env_vars": ["ANTHROPIC_API_KEY"],
},
"litellm": {
"file": "tests/integrations/test_litellm.py",
"description": "LiteLLM integration tests",
"env_vars": ["OPENAI_API_KEY"], # LiteLLM can use OpenAI key
},
"langchain": {
"file": "tests/integrations/test_langchain.py",
"description": "LangChain integration tests",
"env_vars": [
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
], # LangChain uses multiple providers
},
"google": {
"file": "tests/integrations/test_google.py",
"description": "Google GenAI integration tests",
"env_vars": ["GOOGLE_API_KEY"],
},
"bedrock": {
"file": "tests/integrations/test_bedrock.py",
"description": "Bedrock integration tests",
"env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
},
}
self.results = {}
def check_environment(self, integration: str) -> bool:
"""Check if required environment variables are set for an integration"""
config = self.integrations[integration]
missing_vars = []
for var in config["env_vars"]:
if not os.getenv(var):
missing_vars.append(var)
if missing_vars:
print(
f"⚠ Skipping {integration}: Missing environment variables: {', '.join(missing_vars)}"
)
return False
return True
def run_integration_test(self, integration: str, verbose: bool = False) -> Dict:
"""Run tests for a specific integration"""
if integration not in self.integrations:
return {"success": False, "error": f"Unknown integration: {integration}"}
config = self.integrations[integration]
test_file = self.test_dir / config["file"]
if not test_file.exists():
return {"success": False, "error": f"Test file not found: {test_file}"}
# Check environment variables
if not self.check_environment(integration):
return {
"success": False,
"error": "Missing required environment variables",
"skipped": True,
}
print(f"\n{'='*60}")
print(f"Running {integration.upper()} Integration Tests")
print(f"{'='*60}")
print(f"Description: {config['description']}")
print(f"Test file: {config['file']}")
start_time = time.time()
try:
# Run the test with pytest
cmd = [sys.executable, "-m", "pytest", str(test_file)]
# Add pytest flags for better output
if verbose:
cmd.extend(["-v", "-s"]) # verbose and don't capture output
else:
cmd.append("-q") # quiet mode
if verbose:
result = subprocess.run(
cmd, cwd=self.test_dir, text=True, capture_output=False, timeout=300
)
else:
result = subprocess.run(
cmd, cwd=self.test_dir, text=True, capture_output=True, timeout=300
)
elapsed_time = time.time() - start_time
success = result.returncode == 0
return {
"success": success,
"return_code": result.returncode,
"stdout": result.stdout if not verbose else "",
"stderr": result.stderr if not verbose else "",
"elapsed_time": elapsed_time,
}
except subprocess.TimeoutExpired:
return {
"success": False,
"error": "Test timed out (5 minutes)",
"elapsed_time": 300,
}
except Exception as e:
return {
"success": False,
"error": str(e),
"elapsed_time": time.time() - start_time,
}
def run_all_tests(self, parallel: bool = False, verbose: bool = False) -> None:
"""Run all integration tests"""
print("Bifrost Integration End-to-End Test Suite")
print("=" * 50)
print(f"Running tests for {len(self.integrations)} integrations")
print(f"Parallel execution: {'Enabled' if parallel else 'Disabled'}")
print(f"Verbose output: {'Enabled' if verbose else 'Disabled'}")
# Check Bifrost availability
bifrost_url = os.getenv("BIFROST_BASE_URL", "http://localhost:8080")
print(f"Bifrost URL: {bifrost_url}")
start_time = time.time()
if parallel:
self._run_parallel(verbose)
else:
self._run_sequential(verbose)
total_time = time.time() - start_time
self._print_summary(total_time)
def _run_sequential(self, verbose: bool) -> None:
"""Run tests sequentially"""
for integration in self.integrations:
self.results[integration] = self.run_integration_test(integration, verbose)
def _run_parallel(self, verbose: bool) -> None:
"""Run tests in parallel"""
print("\nRunning tests in parallel...")
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
# Submit all tests
future_to_integration = {
executor.submit(
self.run_integration_test, integration, verbose
): integration
for integration in self.integrations
}
# Collect results
for future in concurrent.futures.as_completed(future_to_integration):
integration = future_to_integration[future]
try:
self.results[integration] = future.result()
except Exception as e:
self.results[integration] = {"success": False, "error": str(e)}
def _print_summary(self, total_time: float) -> None:
"""Print test summary"""
print(f"\n{'='*60}")
print("TEST SUMMARY")
print(f"{'='*60}")
passed = 0
failed = 0
skipped = 0
for integration, result in self.results.items():
status = (
"SKIPPED"
if result.get("skipped")
else ("PASSED" if result["success"] else "FAILED")
)
elapsed = result.get("elapsed_time", 0)
if result.get("skipped"):
skipped += 1
print(
f"{integration:12} {status:8} - {result.get('error', 'Unknown error')}"
)
elif result["success"]:
passed += 1
print(f"{integration:12} {status:8} - {elapsed:.2f}s")
else:
failed += 1
error_msg = result.get("error", "Unknown error")
print(f"{integration:12} {status:8} - {error_msg}")
# Print stderr if available
if "stderr" in result and result["stderr"]:
print(f" Error output: {result['stderr'][:200]}...")
print(f"\n{'='*60}")
print(
f"Total: {len(self.integrations)} | Passed: {passed} | Failed: {failed} | Skipped: {skipped}"
)
print(f"Total time: {total_time:.2f} seconds")
print(f"{'='*60}")
# Exit with appropriate code
if failed > 0:
sys.exit(1)
else:
print("All tests completed successfully!")
def list_integrations(self) -> None:
"""List available integrations"""
print("Available Integrations:")
print("=" * 30)
for integration, config in self.integrations.items():
env_status = "" if self.check_environment(integration) else ""
print(f"{env_status} {integration:12} - {config['description']}")
print(f" Required env vars: {', '.join(config['env_vars'])}")
print()
def main():
parser = argparse.ArgumentParser(
description="Run Bifrost integration end-to-end tests",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python run_all_tests.py # Run all tests
python run_all_tests.py --integration openai # Run OpenAI tests only
python run_all_tests.py --parallel --verbose # Run all tests in parallel with verbose output
python run_all_tests.py --list # List available integrations
""",
)
parser.add_argument(
"--integration", "-i", help="Run tests for specific integration only"
)
parser.add_argument(
"--list",
"-l",
action="store_true",
help="List available integrations and their status",
)
parser.add_argument(
"--parallel",
"-p",
action="store_true",
help="Run tests in parallel (faster but less readable output)",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Enable verbose output (shows test output in real-time)",
)
args = parser.parse_args()
runner = BifrostTestRunner()
if args.list:
runner.list_integrations()
return
if args.integration:
if args.integration not in runner.integrations:
print(f"Error: Unknown integration '{args.integration}'")
print(f"Available integrations: {', '.join(runner.integrations.keys())}")
sys.exit(1)
result = runner.run_integration_test(args.integration, args.verbose)
if result["success"]:
print(f"\n{args.integration} tests passed!")
else:
error_msg = result.get("error", "Unknown error")
print(f"\n{args.integration} tests failed: {error_msg}")
# Show stdout/stderr if available
if result.get("stdout"):
print("\n--- Test Output ---")
print(result["stdout"])
if result.get("stderr"):
print("\n--- Error Output ---")
print(result["stderr"])
sys.exit(1)
else:
runner.run_all_tests(args.parallel, args.verbose)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python3
"""
Integration-specific test runner for Bifrost integration tests.
This script runs tests for each integration independently using their native SDKs.
No more complex gateway conversions - just direct testing!
"""
import os
import sys
import argparse
import subprocess
from pathlib import Path
from typing import List, Optional
def check_api_keys():
"""Check which API keys are available"""
keys = {
"openai": os.getenv("OPENAI_API_KEY"),
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
"google": os.getenv("GOOGLE_API_KEY"),
"litellm": os.getenv("LITELLM_API_KEY"),
"bedrock": os.getenv("AWS_ACCESS_KEY_ID"),
}
available = [integration for integration, key in keys.items() if key]
missing = [integration for integration, key in keys.items() if not key]
return available, missing
def run_integration_tests(
integrations: List[str], test_pattern: Optional[str] = None, verbose: bool = False
):
"""Run tests for specified integrations"""
results = {}
for integration in integrations:
print(f"\n{'='*60}")
print(f"🧪 TESTING {integration.upper()} INTEGRATION")
print(f"{'='*60}")
# Build pytest command with absolute path relative to script location
script_dir = Path(__file__).parent
test_file = script_dir / "tests" / "integrations" / f"test_{integration}.py"
# Check if test file exists
if not test_file.exists():
print(f"❌ Test file not found: {test_file}")
results[integration] = {"error": f"Test file not found: {test_file}"}
continue
cmd = ["python", "-m", "pytest", str(test_file)]
if test_pattern:
cmd.extend(["-k", test_pattern])
if verbose:
cmd.append("-v")
else:
cmd.append("-q")
# Remove integration-specific marker (not needed for file-based selection)
# cmd.extend(["-m", integration])
# Run the tests
try:
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=True,
)
results[integration] = {
"returncode": result.returncode,
"stdout": result.stdout,
"stderr": "", # stderr is now captured in stdout
}
# Print results
print(f"{integration.upper()} tests PASSED")
if verbose:
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"{integration.upper()} tests FAILED")
results[integration] = {
"returncode": e.returncode,
"stdout": e.stdout,
"stderr": "", # stderr is captured in stdout
}
# Always print output on failure to show what went wrong
if e.stdout:
print(e.stdout)
except Exception as e:
print(f"❌ Error running {integration} tests: {e}")
results[integration] = {"error": str(e)}
return results
def print_summary(
results: dict, available_integrations: List[str], missing_integrations: List[str]
):
"""Print final summary"""
print(f"\n{'='*80}")
print("🎯 FINAL SUMMARY")
print(f"{'='*80}")
# API Key Status
print(f"\n🔑 API Key Status:")
for integration in available_integrations:
print(f"{integration.upper()}: Available")
for integration in missing_integrations:
print(f"{integration.upper()}: Missing API key")
# Test Results
print(f"\n📊 Test Results:")
passed_integrations = []
failed_integrations = []
for integration, result in results.items():
if "error" in result:
print(f" 💥 {integration.upper()}: Error - {result['error']}")
failed_integrations.append(integration)
elif result["returncode"] == 0:
print(f"{integration.upper()}: All tests passed")
passed_integrations.append(integration)
else:
print(f"{integration.upper()}: Some tests failed")
failed_integrations.append(integration)
# Overall Status
total_tested = len(results)
total_passed = len(passed_integrations)
print(f"\n🏆 Overall Results:")
print(f" Integrations tested: {total_tested}")
print(f" Integrations passed: {total_passed}")
print(
f" Success rate: {(total_passed/total_tested)*100:.1f}%"
if total_tested > 0
else " Success rate: N/A"
)
if failed_integrations:
print(f"\n⚠️ Failed integrations: {', '.join(failed_integrations)}")
print(" Check the detailed output above for specific test failures.")
def main():
parser = argparse.ArgumentParser(
description="Run integration-specific integration tests"
)
parser.add_argument(
"--integrations",
nargs="+",
choices=["openai", "anthropic", "google", "litellm", "all"],
default=["all"],
help="Integrations to test (default: all available)",
)
parser.add_argument(
"--test", help="Run specific test pattern (e.g., 'test_01_simple_chat')"
)
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument(
"--check-keys", action="store_true", help="Only check API key availability"
)
parser.add_argument(
"--show-models",
action="store_true",
help="Show model configuration for all integrations",
)
args = parser.parse_args()
# Check API keys
available_integrations, missing_integrations = check_api_keys()
if args.check_keys:
print("🔑 API Key Status:")
for integration in available_integrations:
print(f"{integration.upper()}: Available")
for integration in missing_integrations:
print(f"{integration.upper()}: Missing")
return
if args.show_models:
# Import and show model configuration using absolute path
script_dir = Path(__file__).parent
models_path = script_dir / "tests" / "utils" / "models.py"
if not models_path.exists():
print(f"❌ Models file not found: {models_path}")
sys.exit(1)
# Add the parent directory to sys.path to enable the import
models_parent_dir = str(script_dir)
if models_parent_dir not in sys.path:
sys.path.insert(0, models_parent_dir)
try:
from tests.utils.models import print_model_summary
print_model_summary()
except ImportError as e:
print(f"❌ Could not import print_model_summary: {e}")
print(f"Tried to import from: {models_path}")
sys.exit(1)
return
# Determine which integrations to test
if "all" in args.integrations:
integrations_to_test = available_integrations
requested_integrations = [
"openai",
"anthropic",
"google",
"litellm",
] # all possible integrations
else:
integrations_to_test = [
p for p in args.integrations if p in available_integrations
]
requested_integrations = args.integrations
if not integrations_to_test:
print("❌ No integrations available for testing. Please set API keys.")
print("\nRequired environment variables for requested integrations:")
for integration in requested_integrations:
if integration != "all": # Skip the "all" keyword
api_key_name = f"{integration.upper()}_API_KEY"
print(f" - {api_key_name}")
sys.exit(1)
# Calculate which requested integrations are missing API keys
requested_missing_integrations = [
integration
for integration in requested_integrations
if integration in missing_integrations
]
# Show what we're about to test
print("🚀 Starting integration tests...")
print(f"📋 Testing integrations: {', '.join(integrations_to_test)}")
if requested_missing_integrations:
print(
f"⏭️ Skipping integrations (no API key): {', '.join(requested_missing_integrations)}"
)
# Run tests
results = run_integration_tests(integrations_to_test, args.test, args.verbose)
# Print summary
print_summary(results, available_integrations, requested_missing_integrations)
# Exit with appropriate code
failed_count = sum(
1 for r in results.values() if r.get("returncode", 1) != 0 or "error" in r
)
sys.exit(failed_count)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,8 @@
"""
Bifrost Integration Tests
Production-ready test suite for testing various AI integrations through Bifrost proxy.
Supports multiple integrations with uniform test interface.
"""
__version__ = "1.0.0"

View File

@@ -0,0 +1,188 @@
"""
Pytest configuration for integration-specific tests.
"""
import pytest
import os
import logging
def pytest_configure(config):
"""Configure pytest with custom markers and logging"""
# Configure logging
logging.basicConfig(
level=logging.ERROR,
format='%(asctime)s [%(levelname)8s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# Add custom markers
config.addinivalue_line("markers", "openai: mark test as requiring OpenAI API key")
config.addinivalue_line(
"markers", "anthropic: mark test as requiring Anthropic API key"
)
config.addinivalue_line("markers", "google: mark test as requiring Google API key")
config.addinivalue_line("markers", "litellm: mark test as requiring LiteLLM setup")
config.addinivalue_line("markers", "azure: Azure OpenAI integration tests")
config.addinivalue_line(
"markers", "flaky: mark test as flaky with automatic retries (reruns=3, reruns_delay=2)"
)
def pytest_collection_modifyitems(config, items):
"""Modify test collection to add markers based on test file names"""
# Add flaky marker to all tests for retry on failure
flaky_marker = pytest.mark.flaky(reruns=3, reruns_delay=2)
for item in items:
# Add flaky marker to all tests
item.add_marker(flaky_marker)
# Add markers based on test file location
if "test_openai" in item.nodeid:
item.add_marker(pytest.mark.openai)
elif "test_anthropic" in item.nodeid:
item.add_marker(pytest.mark.anthropic)
elif "test_google" in item.nodeid:
item.add_marker(pytest.mark.google)
elif "test_litellm" in item.nodeid:
item.add_marker(pytest.mark.litellm)
elif "test_azure" in item.nodeid:
item.add_marker(pytest.mark.azure)
@pytest.fixture(scope="session")
def api_keys():
"""Collect all available API keys"""
return {
"openai": os.getenv("OPENAI_API_KEY"),
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
"google": os.getenv("GOOGLE_API_KEY"),
"litellm": os.getenv("LITELLM_API_KEY"),
"azure": os.getenv("AZURE_API_KEY"),
}
@pytest.fixture(scope="session")
def available_integrations(api_keys):
"""Determine which integrations are available based on API keys"""
available = []
if api_keys["openai"]:
available.append("openai")
if api_keys["anthropic"]:
available.append("anthropic")
if api_keys["google"]:
available.append("google")
if api_keys["litellm"]:
available.append("litellm")
if api_keys["azure"]:
available.append("azure")
return available
@pytest.fixture
def test_summary():
"""Fixture to collect test results for summary reporting"""
results = {"passed": [], "failed": [], "skipped": []}
return results
def pytest_runtest_makereport(item, call):
"""Hook to capture test results"""
# Only record results during the "call" phase to avoid double counting
if call.when == "call":
# Extract integration and test info
integration = None
if "test_openai" in item.nodeid:
integration = "openai"
elif "test_anthropic" in item.nodeid:
integration = "anthropic"
elif "test_google" in item.nodeid:
integration = "google"
elif "test_litellm" in item.nodeid:
integration = "litellm"
elif "test_azure" in item.nodeid:
integration = "azure"
test_name = item.name
# Store result info
result_info = {
"integration": integration,
"test": test_name,
"nodeid": item.nodeid,
}
if hasattr(item.session, "test_results"):
if call.excinfo is None:
item.session.test_results["passed"].append(result_info)
else:
result_info["error"] = str(call.excinfo.value)
item.session.test_results["failed"].append(result_info)
def pytest_sessionstart(session):
"""Initialize test results collection"""
session.test_results = {"passed": [], "failed": [], "skipped": []}
def pytest_sessionfinish(session, exitstatus):
"""Print test summary at the end"""
results = session.test_results
print("\n" + "=" * 80)
print("INTEGRATION TEST SUMMARY")
print("=" * 80)
# Group results by integration
integration_results = {}
for result in results["passed"] + results["failed"] + results["skipped"]:
integration = result.get("integration", "unknown")
if integration and integration not in integration_results:
integration_results[integration] = {"passed": 0, "failed": 0, "skipped": 0}
for result in results["passed"]:
integration = result.get("integration", "unknown")
if integration and integration in integration_results:
integration_results[integration]["passed"] += 1
for result in results["failed"]:
integration = result.get("integration", "unknown")
if integration and integration in integration_results:
integration_results[integration]["failed"] += 1
for result in results["skipped"]:
integration = result.get("integration", "unknown")
if integration and integration in integration_results:
integration_results[integration]["skipped"] += 1
# Print summary by integration
for integration, counts in integration_results.items():
total = counts["passed"] + counts["failed"] + counts["skipped"]
if total > 0:
print(f"\n{integration.upper()} Integration:")
print(f" ✅ Passed: {counts['passed']}")
print(f" ❌ Failed: {counts['failed']}")
print(f" ⏭️ Skipped: {counts['skipped']}")
print(f" 📊 Total: {total}")
if counts["passed"] > 0:
success_rate = (
(counts["passed"] / (counts["passed"] + counts["failed"])) * 100
if (counts["passed"] + counts["failed"]) > 0
else 0
)
print(f" 🎯 Success Rate: {success_rate:.1f}%")
# Print failed tests details
if results["failed"]:
print(f"\n❌ FAILED TESTS ({len(results['failed'])}):")
for result in results["failed"]:
print(f"{result['integration']}: {result['test']}")
if "error" in result:
print(f" Error: {result['error']}")
print("\n" + "=" * 80)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,911 @@
"""
LiteLLM Integration Tests
🤖 MODELS USED:
- Chat: gpt-3.5-turbo (OpenAI via LiteLLM)
- Vision: gpt-4o (OpenAI via LiteLLM)
- Tools: gpt-3.5-turbo (OpenAI via LiteLLM)
- Speech: tts-1 (OpenAI via LiteLLM)
- Transcription: whisper-1 (OpenAI via LiteLLM)
- Embeddings: text-embedding-3-small (OpenAI via LiteLLM)
- Alternatives: claude-3-haiku-20240307, gemini-pro, mistral-7b-instruct, gpt-4, command-r-plus
Tests all 19 core scenarios using LiteLLM SDK directly:
1. Simple chat
2. Multi turn conversation
3. Tool calls
4. Multiple tool calls
5. End2End tool calling
6. Automatic function calling
7. Image (url)
8. Image (base64)
9. Multiple images
10. Complete end2end test with conversation history, tool calls, tool results and images
11. Integration specific tests
12. Error handling
13. Streaming
14. Google Gemini integration
15. Mistral integration
16. OpenAI embeddings via LiteLLM
17. OpenAI speech synthesis via LiteLLM
18. OpenAI transcription via LiteLLM
19. Multi-provider comparison
"""
import pytest
import json
import litellm
from typing import List, Dict, Any
from .utils.common import (
Config,
SIMPLE_CHAT_MESSAGES,
MULTI_TURN_MESSAGES,
SINGLE_TOOL_CALL_MESSAGES,
MULTIPLE_TOOL_CALL_MESSAGES,
IMAGE_URL_MESSAGES,
IMAGE_BASE64_MESSAGES,
MULTIPLE_IMAGES_MESSAGES,
COMPLEX_E2E_MESSAGES,
INVALID_ROLE_MESSAGES,
STREAMING_CHAT_MESSAGES,
STREAMING_TOOL_CALL_MESSAGES,
WEATHER_TOOL,
CALCULATOR_TOOL,
mock_tool_response,
assert_valid_chat_response,
assert_has_tool_calls,
assert_valid_image_response,
assert_valid_error_response,
assert_error_propagation,
assert_valid_streaming_response,
collect_streaming_content,
extract_tool_calls,
get_api_key,
skip_if_no_api_key,
COMPARISON_KEYWORDS,
WEATHER_KEYWORDS,
LOCATION_KEYWORDS,
# Audio and embeddings test data
EMBEDDINGS_SINGLE_TEXT,
EMBEDDINGS_MULTIPLE_TEXTS,
EMBEDDINGS_SIMILAR_TEXTS,
SPEECH_TEST_INPUT,
generate_test_audio,
assert_valid_speech_response,
assert_valid_transcription_response,
assert_valid_embedding_response,
assert_valid_embeddings_batch_response,
calculate_cosine_similarity,
collect_streaming_transcription_content,
get_provider_voice,
get_provider_voices,
# Token counting test data
INPUT_TOKENS_SIMPLE_TEXT,
INPUT_TOKENS_LONG_TEXT,
INPUT_TOKENS_WITH_SYSTEM,
)
from .utils.config_loader import get_model
from .utils.parametrize import (
get_cross_provider_params_for_scenario,
format_provider_model,
)
# LiteLLM-specific provider exclusions
# Bedrock and Cohere don't work well through LiteLLM proxy
# Gemini is excluded because LiteLLM routes it through Vertex AI-specific endpoints
# that Bifrost's LiteLLM integration doesn't support
LITELLM_EXCLUDED_PROVIDERS = ["bedrock", "cohere", "gemini"]
@pytest.fixture
def test_config():
"""Test configuration"""
return Config()
@pytest.fixture(autouse=True)
def setup_litellm(monkeypatch):
"""Setup LiteLLM with Bifrost configuration and dummy credentials"""
import os
from .utils.config_loader import get_integration_url, get_config
from unittest.mock import MagicMock
# Set dummy credentials since Bifrost handles actual authentication
os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
os.environ["MISTRAL_API_KEY"] = "dummy-mistral-key-bifrost-handles-auth"
# For Google, set all possible API key environment variables
os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
os.environ["VERTEX_PROJECT"] = "dummy-vertex-project"
os.environ["VERTEX_LOCATION"] = "us-central1"
# Set dummy Google Application Credentials to prevent Vertex AI from trying to authenticate
# LiteLLM will load these dummy credentials but all actual requests go through Bifrost
from pathlib import Path
dummy_creds_path = Path(__file__).parent.parent / "dummy-gcp-credentials.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(dummy_creds_path)
# litellm._turn_on_debug()
# Mock credential refresh to prevent actual Google API calls
# Since Bifrost handles auth, we don't need LiteLLM to authenticate
def mock_refresh(self, request):
"""Mock refresh that sets a dummy token - Bifrost handles real auth"""
import datetime
self.token = "dummy-access-token-bifrost-handles-auth"
self.expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
try:
from google.oauth2 import service_account
monkeypatch.setattr(service_account.Credentials, "refresh", mock_refresh)
except ImportError:
pass # google-auth not installed
# Get Bifrost URL for LiteLLM
base_url = get_integration_url("litellm")
config = get_config()
integration_settings = config.get_integration_settings("litellm")
api_config = config.get_api_config()
# Configure LiteLLM globally
if base_url:
litellm.api_base = base_url
# Set timeout and other settings
litellm.request_timeout = api_config.get("timeout", 30)
# Apply integration-specific settings
if integration_settings.get("drop_params"):
litellm.drop_params = integration_settings["drop_params"]
if integration_settings.get("debug"):
litellm.set_verbose = integration_settings["debug"]
def convert_to_litellm_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert common tool format to LiteLLM format (OpenAI-compatible)"""
return [{"type": "function", "function": tool} for tool in tools]
class TestLiteLLMIntegration:
"""Test suite for LiteLLM integration covering all 11 core scenarios"""
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"simple_chat", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_01_simple_chat(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 1: Simple chat interaction"""
response = litellm.completion(
model=model,
messages=SIMPLE_CHAT_MESSAGES,
max_tokens=100,
)
assert_valid_chat_response(response)
assert response.choices[0].message.content is not None
assert len(response.choices[0].message.content) > 0
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"multi_turn_conversation", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_02_multi_turn_conversation(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 2: Multi-turn conversation"""
response = litellm.completion(
model=model,
messages=MULTI_TURN_MESSAGES,
max_tokens=150,
)
assert_valid_chat_response(response)
content = response.choices[0].message.content.lower()
# Should mention population or numbers since we asked about Paris population
assert any(word in content for word in ["population", "million", "people", "inhabitants"])
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_03_single_tool_call(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 3: Single tool call"""
tools = convert_to_litellm_tools([WEATHER_TOOL])
response = litellm.completion(
model=model,
messages=SINGLE_TOOL_CALL_MESSAGES,
tools=tools,
max_tokens=100,
)
assert_has_tool_calls(response, expected_count=1)
tool_calls = extract_tool_calls(response)
assert tool_calls[0]["name"] == "get_weather"
assert "location" in tool_calls[0]["arguments"]
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"multiple_tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_04_multiple_tool_calls(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 4: Multiple tool calls in one response"""
tools = convert_to_litellm_tools([WEATHER_TOOL, CALCULATOR_TOOL])
response = litellm.completion(
model=model,
messages=MULTIPLE_TOOL_CALL_MESSAGES,
tools=tools,
max_tokens=200,
)
assert_has_tool_calls(response, expected_count=2)
tool_calls = extract_tool_calls(response)
tool_names = [tc["name"] for tc in tool_calls]
assert "get_weather" in tool_names
assert "calculate" in tool_names
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"end2end_tool_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_05_end2end_tool_calling(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 5: Complete tool calling flow with responses"""
messages = [{"role": "user", "content": "What's the weather in Boston?"}]
tools = convert_to_litellm_tools([WEATHER_TOOL])
response = litellm.completion(
model=model,
messages=messages,
tools=tools,
max_tokens=100,
)
assert_has_tool_calls(response, expected_count=1)
# Add assistant's tool call to conversation
messages.append(response.choices[0].message)
# Add tool response
tool_calls = extract_litellm_tool_calls(response)
tool_response = mock_tool_response(tool_calls[0]["name"], tool_calls[0]["arguments"])
messages.append(
{
"role": "tool",
"tool_call_id": response.choices[0].message.tool_calls[0].id,
"content": tool_response,
}
)
# Get final response
final_response = litellm.completion(
model=get_model("litellm", "chat"), messages=messages, max_tokens=150
)
assert_valid_chat_response(final_response)
content = final_response.choices[0].message.content.lower()
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
assert any(word in content for word in weather_location_keywords)
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"automatic_function_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_06_automatic_function_calling(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 6: Automatic function calling"""
tools = convert_to_litellm_tools([CALCULATOR_TOOL])
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": "Calculate 25 * 4 for me"}],
tools=tools,
tool_choice="auto",
max_tokens=100,
)
# Should automatically choose to use the calculator
assert_has_tool_calls(response, expected_count=1)
tool_calls = extract_litellm_tool_calls(response)
assert tool_calls[0]["name"] == "calculate"
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"image_url", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_07_image_url(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 7: Image analysis from URL"""
response = litellm.completion(
model=model,
messages=IMAGE_URL_MESSAGES,
max_tokens=200,
)
assert_valid_image_response(response)
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"image_base64", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_08_image_base64(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 8: Image analysis from base64"""
response = litellm.completion(
model=model,
messages=IMAGE_BASE64_MESSAGES,
max_tokens=200,
)
assert_valid_image_response(response)
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"multiple_images", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_09_multiple_images(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 9: Multiple image analysis"""
response = litellm.completion(
model=model,
messages=MULTIPLE_IMAGES_MESSAGES,
max_tokens=300,
)
assert_valid_image_response(response)
content = response.choices[0].message.content.lower()
# Should mention comparison or differences
assert any(
word in content for word in COMPARISON_KEYWORDS
), f"Response should contain comparison keywords. Got content: {content}"
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"complex_e2end", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
@pytest.mark.skipif(True, reason="Known flaky test")
def test_10_complex_end2end(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 10: Complex end-to-end with conversation, images, and tools"""
messages = COMPLEX_E2E_MESSAGES.copy()
tools = convert_to_litellm_tools([WEATHER_TOOL])
# First, analyze the image
response1 = litellm.completion(
model=model,
messages=messages,
tools=tools,
max_tokens=300,
)
# Should either describe image or call weather tool (or both)
assert (
response1.choices[0].message.content is not None
or response1.choices[0].message.tool_calls is not None
)
# Add response to conversation
messages.append(response1.choices[0].message)
# If there were tool calls, handle them
if response1.choices[0].message.tool_calls:
for tool_call in response1.choices[0].message.tool_calls:
tool_name = tool_call.function.name
tool_args = json.loads(tool_call.function.arguments)
tool_response = mock_tool_response(tool_name, tool_args)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_response,
}
)
# Get final response after tool calls
final_response = litellm.completion(model=model, messages=messages, max_tokens=200)
assert_valid_chat_response(final_response)
@pytest.mark.skip(reason="known flaky test")
def test_11_integration_specific_features(self, test_config):
"""Test Case 11: LiteLLM-specific features"""
# Test 1: Multiple integrations through LiteLLM
# Note: Gemini is excluded as LiteLLM routes it through Vertex AI-specific endpoints
integrations_to_test = [
"gpt-3.5-turbo", # OpenAI
"claude-3-haiku-20240307", # Anthropic
"mistral/mistral-7b-instruct", # Mistral
]
for model in integrations_to_test:
try:
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": "Hello, how are you?"}],
max_tokens=50,
)
assert_valid_chat_response(response)
except Exception as e:
# Some integrations might not be available, skip gracefully
pytest.skip(f"Integration {model} not available: {e}")
# Test 2: Function calling with specific tool choice
tools = convert_to_litellm_tools([CALCULATOR_TOOL, WEATHER_TOOL])
response2 = litellm.completion(
model=get_model("litellm", "chat"),
messages=[{"role": "user", "content": "What's 15 + 27?"}],
tools=tools,
tool_choice={"type": "function", "function": {"name": "calculate"}},
max_tokens=100,
)
assert_has_tool_calls(response2, expected_count=1)
tool_calls = extract_litellm_tool_calls(response2)
assert tool_calls[0]["name"] == "calculate"
# Test 3: Temperature and other parameters
response3 = litellm.completion(
model=get_model("litellm", "chat"),
messages=[{"role": "user", "content": "Tell me a creative story in one sentence."}],
temperature=0.9,
top_p=0.9,
max_tokens=100,
)
assert_valid_chat_response(response3)
def test_12_error_handling_invalid_roles(self, test_config):
"""Test Case 12: Error handling for invalid roles"""
with pytest.raises(Exception) as exc_info:
litellm.completion(
model=get_model("litellm", "chat"),
messages=INVALID_ROLE_MESSAGES,
max_tokens=100,
)
# Verify the error is properly caught and contains role-related information
error = exc_info.value
assert_valid_error_response(error, "tester")
assert_error_propagation(error, "litellm")
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"streaming", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_13_streaming(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 13: Streaming chat completion"""
# Test basic streaming
stream = litellm.completion(
model=model,
messages=STREAMING_CHAT_MESSAGES,
max_tokens=200,
stream=True,
)
content, chunk_count, tool_calls_detected = collect_streaming_content(
stream, "openai", timeout=120 # LiteLLM uses OpenAI format
)
# Validate streaming results
assert chunk_count > 0, "Should receive at least one chunk"
assert len(content) > 10, "Should receive substantial content"
assert not tool_calls_detected, "Basic streaming shouldn't have tool calls"
# Test streaming with tool calls
stream_with_tools = litellm.completion(
model=model,
messages=STREAMING_TOOL_CALL_MESSAGES,
max_tokens=150,
tools=convert_to_litellm_tools([WEATHER_TOOL]),
stream=True,
)
content_tools, chunk_count_tools, tool_calls_detected_tools = collect_streaming_content(
stream_with_tools, "openai", timeout=120 # LiteLLM uses OpenAI format
)
# Validate tool streaming results
assert chunk_count_tools > 0, "Should receive at least one chunk with tools"
assert tool_calls_detected_tools, "Should detect tool calls in streaming response"
@pytest.mark.skip(reason="known flaky test")
def test_14_gemini_integration(self, test_config):
"""Test Case 14: Google Gemini integration through LiteLLM"""
try:
# Test basic chat with Gemini
response = litellm.completion(
model="gemini-2.0-flash-001",
messages=[
{
"role": "user",
"content": "What is machine learning? Answer in one sentence.",
}
],
max_tokens=100,
)
assert_valid_chat_response(response)
content = response.choices[0].message.content.lower()
assert any(
word in content for word in ["machine", "learning", "data", "algorithm"]
), f"Response should mention ML concepts. Got: {content}"
# Test with tool calling if supported
tools = convert_to_litellm_tools([CALCULATOR_TOOL])
response_tools = litellm.completion(
model="gemini-2.0-flash-001",
messages=[{"role": "user", "content": "Calculate 42 * 17"}],
tools=tools,
max_tokens=100,
)
# Gemini should either use tools or provide calculation
if response_tools.choices[0].message.tool_calls:
assert_has_tool_calls(response_tools, expected_count=1)
else:
# Should at least provide the calculation result
content = response_tools.choices[0].message.content
assert "714" in content or "42" in content, "Should provide calculation result"
except Exception as e:
pytest.skip(f"Gemini integration not available: {e}")
@pytest.mark.skip(reason="known flaky test")
def test_15_mistral_integration(self, test_config):
"""Test Case 15: Mistral integration through LiteLLM"""
try:
# Test basic chat with Mistral
response = litellm.completion(
model="mistral/mistral-7b-instruct",
messages=[
{
"role": "user",
"content": "Explain recursion in programming briefly.",
}
],
max_tokens=150,
)
assert_valid_chat_response(response)
content = response.choices[0].message.content.lower()
assert any(
word in content for word in ["recursion", "function", "itself", "call"]
), f"Response should explain recursion. Got: {content}"
# Test with different temperature
response_creative = litellm.completion(
model="mistral/mistral-7b-instruct",
messages=[{"role": "user", "content": "Write a haiku about code."}],
temperature=0.8,
max_tokens=100,
)
assert_valid_chat_response(response_creative)
except Exception as e:
pytest.skip(f"Mistral integration not available: {e}")
@pytest.mark.skip(reason="known flaky test")
def test_16_openai_embeddings_via_litellm(self, test_config):
"""Test Case 16: OpenAI embeddings through LiteLLM"""
try:
# Test single text embedding
response = litellm.embedding(
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
input=EMBEDDINGS_SINGLE_TEXT,
)
assert_valid_embedding_response(response, expected_dimensions=1536)
# Test batch embeddings
batch_response = litellm.embedding(
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
input=EMBEDDINGS_MULTIPLE_TEXTS,
)
assert_valid_embeddings_batch_response(
batch_response, len(EMBEDDINGS_MULTIPLE_TEXTS), expected_dimensions=1536
)
# Test similarity analysis
similar_response = litellm.embedding(
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
input=EMBEDDINGS_SIMILAR_TEXTS,
)
embeddings = [
item["embedding"] if isinstance(item, dict) else item.embedding
for item in (
similar_response["data"]
if isinstance(similar_response, dict)
else similar_response.data
)
]
# Calculate similarity between similar texts
similarity = calculate_cosine_similarity(embeddings[0], embeddings[1])
assert (
similarity > 0.7
), f"Similar texts should have high similarity, got {similarity:.4f}"
except Exception as e:
pytest.skip(f"OpenAI embeddings through LiteLLM not available: {e}")
def test_17_openai_speech_via_litellm(self, test_config):
"""Test Case 17: OpenAI speech synthesis through LiteLLM"""
try:
# Test basic speech synthesis
response = litellm.speech(
model=get_model("litellm", "speech") or "tts-1",
voice=get_provider_voice("openai", "primary"),
input=SPEECH_TEST_INPUT,
)
# LiteLLM might return different response format
if hasattr(response, "content"):
audio_content = response.content
elif isinstance(response, bytes):
audio_content = response
else:
audio_content = response
assert_valid_speech_response(audio_content)
# Test with different voice
response2 = litellm.speech(
model=get_model("litellm", "speech") or "tts-1",
voice=get_provider_voice("openai", "secondary"),
input="Short test message for voice comparison.",
response_format="mp3",
)
if hasattr(response2, "content"):
audio_content2 = response2.content
elif isinstance(response2, bytes):
audio_content2 = response2
else:
audio_content2 = response2
assert_valid_speech_response(audio_content2, expected_audio_size_min=500)
# Different voices should produce different audio
assert (
audio_content != audio_content2
), "Different voices should produce different audio"
except Exception as e:
pytest.skip(f"OpenAI speech through LiteLLM not available: {e}")
def test_18_openai_transcription_via_litellm(self, test_config):
"""Test Case 18: OpenAI transcription through LiteLLM"""
try:
# Generate test audio for transcription
test_audio = generate_test_audio()
# Test basic transcription
response = litellm.transcription(
model=get_model("litellm", "transcription") or "whisper-1",
file=("test_audio.wav", test_audio, "audio/wav"),
)
assert_valid_transcription_response(response)
# Test with additional parameters
response2 = litellm.transcription(
model=get_model("litellm", "transcription") or "whisper-1",
file=("test_audio.wav", test_audio, "audio/wav"),
language="en",
temperature=0.0,
)
assert_valid_transcription_response(response2)
except Exception as e:
pytest.skip(f"OpenAI transcription through LiteLLM not available: {e}")
def test_19_multi_provider_comparison(self, test_config):
"""Test Case 19: Compare responses across different providers through LiteLLM"""
test_prompt = "What is the capital of Japan? Answer in one word."
models_to_test = [
"gpt-3.5-turbo", # OpenAI
"claude-3-haiku-20240307", # Anthropic
"gemini-2.0-flash-001", # Google
]
responses = {}
for model in models_to_test:
try:
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": test_prompt}],
max_tokens=50,
)
assert_valid_chat_response(response)
responses[model] = response.choices[0].message.content.lower()
except Exception as e:
print(f"Model {model} not available: {e}")
continue
# Verify that we got at least one response
assert len(responses) > 0, "Should get at least one successful response"
# All responses should mention Tokyo or Japan
for model, content in responses.items():
assert any(
word in content for word in ["tokyo", "japan"]
), f"Model {model} should mention Tokyo. Got: {content}"
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_20_token_counter_simple_text(self, test_config, provider, model):
"""Test Case 20: Count tokens from simple text using LiteLLM token_counter"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
# Count tokens using text parameter
token_count = litellm.token_counter(
model=model,
text=INPUT_TOKENS_SIMPLE_TEXT,
)
# Validate token count
assert isinstance(token_count, int), "Token count should be an integer"
assert token_count > 0, "Token count should be positive"
# Simple text should have a reasonable token count (between 3-20 tokens)
assert 3 <= token_count <= 20, (
f"Simple text should have 3-20 tokens, got {token_count}"
)
except Exception as e:
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_21_token_counter_with_messages(self, test_config, provider, model):
"""Test Case 21: Count tokens from messages with system message using LiteLLM token_counter"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
# Count tokens using messages parameter
token_count = litellm.token_counter(
model=model,
messages=INPUT_TOKENS_WITH_SYSTEM,
)
# Validate token count
assert isinstance(token_count, int), "Token count should be an integer"
assert token_count > 0, "Token count should be positive"
# With system message should have more tokens than simple text
assert token_count > 2, (
f"With system message should have >2 tokens, got {token_count}"
)
except Exception as e:
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_22_token_counter_long_text(self, test_config, provider, model):
"""Test Case 22: Count tokens from long text using LiteLLM token_counter"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
# Count tokens using text parameter with long text
token_count = litellm.token_counter(
model=model,
text=INPUT_TOKENS_LONG_TEXT,
)
# Validate token count
assert isinstance(token_count, int), "Token count should be an integer"
assert token_count > 100, (
f"Long text should have >100 tokens, got {token_count}"
)
except Exception as e:
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
# Additional helper functions specific to LiteLLM
def extract_litellm_tool_calls(response: Any) -> List[Dict[str, Any]]:
"""Extract tool calls from LiteLLM response format (OpenAI-compatible) with proper type checking"""
tool_calls = []
# Type check for LiteLLM response (OpenAI-compatible format)
if not hasattr(response, "choices") or not response.choices:
return tool_calls
choice = response.choices[0]
if not hasattr(choice, "message") or not hasattr(choice.message, "tool_calls"):
return tool_calls
if not choice.message.tool_calls:
return tool_calls
for tool_call in choice.message.tool_calls:
if hasattr(tool_call, "function") and hasattr(tool_call.function, "name"):
try:
arguments = (
json.loads(tool_call.function.arguments)
if isinstance(tool_call.function.arguments, str)
else tool_call.function.arguments
)
tool_calls.append(
{
"name": tool_call.function.name,
"arguments": arguments,
}
)
except (json.JSONDecodeError, AttributeError) as e:
print(f"Warning: Failed to parse LiteLLM tool call arguments: {e}")
continue
return tool_calls

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,781 @@
"""
Pydantic AI Integration Tests - Cross-Provider Support
🌉 CROSS-PROVIDER TESTING:
This test suite uses Pydantic AI to test against multiple AI providers through Bifrost.
Tests automatically run against all available providers with proper capability filtering.
🤖 PYDANTIC AI COMPONENTS TESTED:
- Agent: Core agent class for running LLM interactions
- Models: OpenAI (OpenAIChatModel), Anthropic (AnthropicModel), Google (GoogleModel), Cohere (CohereModel)
- Providers: OpenAIProvider, AnthropicProvider, GoogleProvider, CohereProvider
- Tools: Function tools with @agent.tool decorator
- Structured Output: Pydantic BaseModel result types
- Streaming: Real-time response streaming
- Async Operations: agent.run() async patterns
⚠️ PROVIDER LIMITATIONS:
- Bedrock: Not supported in PydanticAI tests - tested separately in test_bedrock.py
Tests Pydantic AI standard interface compliance and Bifrost integration:
1. Basic Agent chat - Cross-provider
2. Agent with system prompt (instructions) - Cross-provider
3. Multi-turn conversation with message history - Cross-provider
4. Tool calling with @agent.tool decorator - Cross-provider
5. End-to-end tool calling with multi-turn flow - Cross-provider
6. Structured output with Pydantic models - Cross-provider
7. Streaming responses - Cross-provider
8. Async operations
9. Error handling
10. Tool with context - Cross-provider
11. Multiple tools - Cross-provider
12. Result validation
13. Usage tracking
14. Message history inspection
15. Dynamic instructions
"""
import pytest
import asyncio
import os
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from pydantic import BaseModel, Field
from pydantic_ai import Agent, RunContext, Tool
# Pydantic AI model imports
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider
# Optional provider imports
try:
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.providers.anthropic import AnthropicProvider
ANTHROPIC_AVAILABLE = True
except ImportError:
ANTHROPIC_AVAILABLE = False
AnthropicModel = None
AnthropicProvider = None
try:
from pydantic_ai.models.google import GoogleModel
from pydantic_ai.providers.google import GoogleProvider
GOOGLE_AVAILABLE = True
except ImportError:
GOOGLE_AVAILABLE = False
GoogleModel = None
GoogleProvider = None
try:
from cohere import AsyncClientV2 as CohereAsyncClient
from pydantic_ai.models.cohere import CohereModel
from pydantic_ai.providers.cohere import CohereProvider
COHERE_AVAILABLE = True
except ImportError:
COHERE_AVAILABLE = False
CohereAsyncClient = None
CohereModel = None
CohereProvider = None
from .utils.common import (
Config,
SIMPLE_CHAT_MESSAGES,
MULTI_TURN_MESSAGES,
WEATHER_TOOL,
CALCULATOR_TOOL,
EMBEDDINGS_SINGLE_TEXT,
EMBEDDINGS_MULTIPLE_TEXTS,
mock_tool_response,
assert_valid_chat_response,
get_api_key,
skip_if_no_api_key,
WEATHER_KEYWORDS,
LOCATION_KEYWORDS,
)
from .utils.config_loader import get_model, get_integration_url, get_config
from .utils.parametrize import (
get_cross_provider_params_for_scenario,
format_provider_model,
)
@pytest.fixture
def test_config():
"""Test configuration"""
return Config()
@pytest.fixture(autouse=True)
def setup_pydanticai():
"""Setup Pydantic AI with Bifrost configuration and dummy credentials"""
# Set dummy credentials since Bifrost handles actual authentication
os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
os.environ["CO_API_KEY"] = "dummy-cohere-key-bifrost-handles-auth"
yield
# Cleanup is handled by pytest
def get_openai_model(model_name: str | None = None) -> OpenAIChatModel:
"""Create an OpenAI model configured for Bifrost"""
base_url = get_integration_url("pydanticai")
if model_name is None:
model_name = get_model("pydanticai", "chat")
provider = OpenAIProvider(
base_url=f"{base_url}/v1",
api_key="dummy-openai-key-bifrost-handles-auth"
)
return OpenAIChatModel(model_name, provider=provider)
def get_anthropic_model(model_name: str = "claude-3-haiku-20240307") -> Optional[Any]:
"""Create an Anthropic model configured for Bifrost"""
if not ANTHROPIC_AVAILABLE:
return None
base_url = get_integration_url("pydanticai")
# Note: Anthropic SDK adds /v1 internally, so we don't append it here
# (unlike OpenAI SDK which expects /v1 in the base URL)
provider = AnthropicProvider(
base_url=base_url,
api_key="dummy-anthropic-key-bifrost-handles-auth"
)
return AnthropicModel(model_name, provider=provider)
def get_google_model(model_name: str = "gemini-2.0-flash") -> Optional[Any]:
"""Create a Google model configured for Bifrost"""
if not GOOGLE_AVAILABLE:
return None
base_url = get_integration_url("pydanticai")
# Configure GoogleProvider with Bifrost endpoint
provider = GoogleProvider(
api_key="dummy-google-api-key-bifrost-handles-auth",
base_url=base_url
)
return GoogleModel(model_name, provider=provider)
def get_cohere_model(model_name: str = "command-r7b-12-2024") -> Optional[Any]:
"""Create a Cohere model configured for Bifrost"""
if not COHERE_AVAILABLE:
return None
base_url = get_integration_url("pydanticai")
# Cohere SDK's AsyncClientV2 accepts base_url parameter
# We create a custom client pointing to Bifrost and pass it to CohereProvider
cohere_client = CohereAsyncClient(
api_key="dummy-cohere-key-bifrost-handles-auth",
base_url=base_url
)
provider = CohereProvider(
cohere_client=cohere_client
)
return CohereModel(model_name, provider=provider)
def get_pydanticai_model_for_provider(provider: str, model: str) -> Any:
"""
Factory function to create a Pydantic AI model for a given provider.
This is the cross-provider equivalent of format_provider_model() used in Bedrock tests,
but returns actual Pydantic AI model objects instead of string identifiers.
Args:
provider: Provider name (e.g., 'openai', 'anthropic', 'gemini', 'cohere')
model: Model name (e.g., 'gpt-4o-mini', 'claude-sonnet-4-20250514')
Returns:
Configured Pydantic AI model object for the provider
Raises:
ValueError: If provider is not supported or required SDK is not available
"""
provider_lower = provider.lower()
if provider_lower == "openai":
return get_openai_model(model)
elif provider_lower == "anthropic":
if not ANTHROPIC_AVAILABLE:
raise ValueError(f"Anthropic SDK not available for provider '{provider}'")
return get_anthropic_model(model)
elif provider_lower in ["gemini", "google"]:
if not GOOGLE_AVAILABLE:
raise ValueError(f"Google GenAI SDK not available for provider '{provider}'")
return get_google_model(model)
elif provider_lower == "cohere":
if not COHERE_AVAILABLE:
raise ValueError(f"Cohere SDK not available for provider '{provider}'")
return get_cohere_model(model)
elif provider_lower == "bedrock":
# Bedrock is tested separately in test_bedrock.py using the native Bedrock API
# PydanticAI doesn't have native Bedrock support, and using OpenAI SDK causes
# validation errors due to response format differences (e.g., empty service_tier)
raise ValueError(
f"Provider 'bedrock' is not supported in PydanticAI tests - "
f"use test_bedrock.py for Bedrock testing"
)
else:
raise ValueError(f"Unsupported provider: {provider}. Supported: openai, anthropic, gemini, cohere")
# Structured output models for testing
class CityInfo(BaseModel):
"""Information about a city"""
city: str = Field(description="Name of the city")
country: str = Field(description="Country where the city is located")
class WeatherResponse(BaseModel):
"""Weather information response"""
location: str = Field(description="Location for the weather")
temperature: str = Field(description="Current temperature")
conditions: str = Field(description="Weather conditions description")
class CalculationResult(BaseModel):
"""Result of a calculation"""
expression: str = Field(description="The mathematical expression")
result: float = Field(description="The calculated result")
class TestPydanticAIIntegration:
"""Comprehensive Pydantic AI integration tests through Bifrost"""
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
def test_01_basic_agent_chat(self, test_config, provider, model):
"""Test Case 1: Basic Agent chat functionality - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions="Be concise, reply with one sentence.",
)
result = agent.run_sync("Hello! How are you today?")
assert result is not None
assert result.output is not None
assert len(str(result.output)) > 0
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
def test_02_agent_with_system_prompt(self, test_config, provider, model):
"""Test Case 2: Agent with custom system prompt (instructions) - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions=(
"You are a helpful geography expert. "
"Always mention the continent when discussing cities."
),
)
result = agent.run_sync("What is the capital of France?")
assert result is not None
assert result.output is not None
content = str(result.output).lower()
assert "paris" in content
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multi_turn_conversation"))
def test_03_multi_turn_conversation(self, test_config, provider, model):
"""Test Case 3: Multi-turn conversation with message history - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions="You are a helpful assistant. Remember context from previous messages.",
)
# First turn
result1 = agent.run_sync("My name is Alice.")
# Second turn - should remember the name
result2 = agent.run_sync(
"What is my name?",
message_history=result1.all_messages(),
)
assert result2 is not None
assert result2.output is not None
content = str(result2.output).lower()
assert "alice" in content
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
def test_04_tool_calling(self, test_config, provider, model):
"""Test Case 4: Tool calling with @agent.tool decorator - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
# Define tools as functions
def get_weather(location: str) -> str:
"""Get the current weather for a location."""
return f"The weather in {location} is 72°F and sunny."
def calculate(expression: str) -> str:
"""Perform a mathematical calculation."""
try:
# Safe evaluation for simple expressions
result = eval(expression.replace("x", "*").replace("×", "*"))
return f"The result of {expression} is {result}"
except Exception:
return f"Could not calculate {expression}"
agent = Agent(
pydantic_model,
tools=[get_weather, calculate],
instructions="You are a helpful assistant that can check weather and do calculations.",
)
result = agent.run_sync("What's the weather like in Boston?")
assert result is not None
assert result.output is not None
content = str(result.output).lower()
# Should either mention weather info or Boston
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
assert any(
word in content for word in weather_location_keywords
), f"Response should mention weather or location. Got: {content}"
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("end2end_tool_calling"))
def test_05_end2end_tool_calling(self, test_config, provider, model):
"""Test Case 5: Complete end-to-end tool calling flow with multi-turn conversation - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
# Define a tool that we'll manually execute
def get_weather(location: str) -> str:
"""Get the current weather for a location."""
return f"The weather in {location} is 72°F and sunny."
agent = Agent(
pydantic_model,
tools=[get_weather],
instructions="You are a helpful assistant that can check weather.",
)
# Step 1: Initial request - should trigger tool call
result1 = agent.run_sync("What's the weather in Boston in fahrenheit?")
assert result1 is not None
assert result1.output is not None
# Pydantic AI automatically executes tools, so result1.output should contain
# the final response with weather information.
# Verify the response contains weather information
content = str(result1.output).lower()
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
assert any(
word in content for word in weather_location_keywords
), f"Response should mention weather or location. Got: {content}"
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydantic_structured_output"))
def test_06_structured_output(self, test_config, provider, model):
"""Test Case 5: Structured output with Pydantic models - runs on providers with reliable PydanticAI structured output support"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
output_type=CityInfo,
instructions="Extract city information from the user's question.",
)
result = agent.run_sync("Tell me about Paris, the capital of France.")
assert result is not None
assert result.output is not None
assert isinstance(result.output, CityInfo)
assert result.output.city.lower() == "paris"
assert "france" in result.output.country.lower()
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydanticai_streaming"))
def test_07_streaming_responses(self, test_config, provider, model):
"""Test Case 7: Streaming response functionality - runs on providers with PydanticAI streaming support"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions="You are a storyteller. Tell short, engaging stories.",
)
# Use async streaming with proper event loop handling
async def run_streaming():
chunks = []
async with agent.run_stream("Tell me a very short story about a robot.") as response:
async for chunk in response.stream_text():
chunks.append(chunk)
return "".join(chunks), len(chunks)
# Use asyncio.new_event_loop() to avoid conflicts with existing event loops
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
full_content, chunk_count = loop.run_until_complete(run_streaming())
finally:
loop.close()
assert chunk_count > 0, "Should receive streaming chunks"
assert len(full_content) > 0, "Should have content from streaming"
assert any(
word in full_content.lower() for word in ["robot", "story", "once"]
), f"Response should be a story about robots. Got: {full_content[:200]}"
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
def test_08_async_operations(self, test_config):
"""Test Case 8: Async operation support"""
async def async_test():
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Be concise.",
)
result = await agent.run("Hello from async!")
assert result is not None
assert result.output is not None
assert len(str(result.output)) > 0
return True
except Exception as e:
pytest.skip(f"Async operations through Pydantic AI not available: {e}")
return False
result = asyncio.run(async_test())
if result is not False:
assert result is True
def test_09_error_handling(self, test_config):
"""Test Case 9: Error handling for invalid requests"""
try:
# Test with invalid model name
base_url = get_integration_url("pydanticai")
provider = OpenAIProvider(
base_url=f"{base_url}/v1",
api_key="dummy-key"
)
model = OpenAIChatModel("invalid-model-name-should-fail", provider=provider)
agent = Agent(model)
with pytest.raises(Exception) as exc_info:
agent.run_sync("This should fail gracefully.")
# Should get a meaningful error
error_message = str(exc_info.value).lower()
assert any(
word in error_message
for word in ["model", "error", "invalid", "not found", "does not exist"]
)
except Exception as e:
pytest.skip(f"Error handling test through Pydantic AI not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
def test_10_tool_with_context(self, test_config, provider, model):
"""Test Case 10: Tool with RunContext for dependency injection - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
@dataclass
class UserDeps:
user_name: str
user_id: int
def get_user_info(ctx: RunContext[UserDeps]) -> str:
"""Get information about the current user."""
return f"User: {ctx.deps.user_name} (ID: {ctx.deps.user_id})"
agent = Agent(
pydantic_model,
deps_type=UserDeps,
tools=[Tool(get_user_info, takes_ctx=True)],
instructions="You can look up user information when asked.",
)
deps = UserDeps(user_name="Alice", user_id=123)
result = agent.run_sync("What is my user information?", deps=deps)
assert result is not None
assert result.output is not None
content = str(result.output).lower()
# Should mention Alice or user info
assert "alice" in content or "user" in content
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multiple_tool_calls"))
def test_11_multiple_tools(self, test_config, provider, model):
"""Test Case 11: Multiple tools in single agent - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
def get_weather(location: str) -> str:
"""Get weather for a location."""
return f"Weather in {location}: 72°F, sunny"
def get_time(timezone: str) -> str:
"""Get current time in a timezone."""
return f"Current time in {timezone}: 2:30 PM"
def translate(text: str, target_language: str) -> str:
"""Translate text to another language."""
return f"'{text}' in {target_language}: [translated]"
agent = Agent(
pydantic_model,
tools=[get_weather, get_time, translate],
instructions="You can check weather, time, and translate text.",
)
result = agent.run_sync("What's the weather in New York?")
assert result is not None
assert result.output is not None
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
def test_12_agent_with_result_validators(self, test_config):
"""Test Case 12: Agent with result type validation"""
try:
model = get_openai_model()
class NumberResponse(BaseModel):
"""A response containing a number"""
value: int = Field(ge=0, le=100, description="A number between 0 and 100")
explanation: str = Field(description="Explanation of the number")
agent = Agent(
model,
output_type=NumberResponse,
instructions="When asked for a number, provide a value between 0 and 100.",
)
result = agent.run_sync("Give me a random number for a dice roll (1-6).")
assert result is not None
assert result.output is not None
assert isinstance(result.output, NumberResponse)
assert 0 <= result.output.value <= 100
except Exception as e:
pytest.skip(f"Result validation through Pydantic AI not available: {e}")
def test_13_usage_tracking(self, test_config):
"""Test Case 13: Usage tracking and token counting"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Be concise.",
)
result = agent.run_sync("Say hello.")
assert result is not None
# Check usage information
usage = result.usage()
assert usage is not None
# Usage should have token counts
if hasattr(usage, 'total_tokens'):
assert usage.total_tokens > 0
elif hasattr(usage, 'input_tokens'):
assert usage.input_tokens > 0
except Exception as e:
pytest.skip(f"Usage tracking through Pydantic AI not available: {e}")
def test_14_message_history_inspection(self, test_config):
"""Test Case 14: Inspect message history after run"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Be helpful.",
)
result = agent.run_sync("What is 2 + 2?")
# Inspect all messages
messages = result.all_messages()
assert messages is not None
assert len(messages) >= 2 # At least request and response
# Should have user message and assistant response
message_kinds = [msg.kind for msg in messages]
assert "request" in message_kinds
assert "response" in message_kinds
except Exception as e:
pytest.skip(f"Message history inspection through Pydantic AI not available: {e}")
def test_15_dynamic_instructions(self, test_config):
"""Test Case 15: Dynamic instructions based on context"""
try:
model = get_openai_model()
@dataclass
class LanguageDeps:
language: str
agent = Agent(
model,
deps_type=LanguageDeps,
)
@agent.instructions
def dynamic_instructions(ctx: RunContext[LanguageDeps]) -> str:
return f"Always respond in {ctx.deps.language}. Be concise."
deps = LanguageDeps(language="English")
result = agent.run_sync("Say hello.", deps=deps)
assert result is not None
assert result.output is not None
# Response should be in English
content = str(result.output).lower()
assert any(word in content for word in ["hello", "hi", "greetings"])
except Exception as e:
pytest.skip(f"Dynamic instructions through Pydantic AI not available: {e}")
# Additional test class for edge cases
class TestPydanticAIEdgeCases:
"""Edge case tests for Pydantic AI integration"""
def test_empty_response_handling(self, test_config):
"""Test handling of potentially empty responses"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="If asked to say nothing, respond with a single space.",
)
result = agent.run_sync("Say as little as possible.")
# Should still get a valid result object
assert result is not None
except Exception as e:
pytest.skip(f"Empty response handling test not available: {e}")
def test_special_characters_in_prompt(self, test_config):
"""Test handling of special characters in prompts"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Echo back special characters correctly.",
)
special_prompt = "Handle these: 你好 🎉 <tag> & \"quotes\" 'apostrophe'"
result = agent.run_sync(special_prompt)
assert result is not None
assert result.output is not None
except Exception as e:
pytest.skip(f"Special characters test not available: {e}")
def test_long_conversation_context(self, test_config):
"""Test handling of longer conversation context"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="You are a helpful assistant.",
)
# Build up conversation history
history = None
for i in range(3):
result = agent.run_sync(
f"Remember number {i + 1}.",
message_history=history,
)
history = result.all_messages()
# Final query should work with accumulated history
final_result = agent.run_sync(
"What numbers did I ask you to remember?",
message_history=history,
)
assert final_result is not None
assert final_result.output is not None
except Exception as e:
pytest.skip(f"Long conversation context test not available: {e}")

View File

@@ -0,0 +1 @@
# Utils package for shared test utilities

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,522 @@
"""
Configuration loader for Bifrost integration tests.
This module loads configuration from config.yml and provides utilities
for constructing integration URLs through the Bifrost gateway.
"""
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
import yaml
# Integration to provider mapping
# Maps integration names to their underlying provider configurations
INTEGRATION_TO_PROVIDER_MAP = {
"openai": "openai",
"anthropic": "anthropic",
"google": "gemini", # Google integration uses Gemini provider
"litellm": "openai", # LiteLLM defaults to OpenAI
"langchain": "openai", # LangChain defaults to OpenAI
"pydanticai": "openai", # Pydantic AI defaults to OpenAI
"bedrock": "bedrock", # Bedrock defaults to Amazon provider
"azure": "azure",
}
@dataclass
class BifrostConfig:
"""Bifrost gateway configuration"""
base_url: str
endpoints: Dict[str, str]
@dataclass
class IntegrationModels:
"""Model configuration for a integration"""
chat: str
vision: str
tools: str
alternatives: list
@dataclass
class TestConfig:
"""Complete test configuration"""
bifrost: BifrostConfig
api: Dict[str, Any]
models: Dict[str, IntegrationModels]
model_capabilities: Dict[str, Dict[str, Any]]
test_settings: Dict[str, Any]
integration_settings: Dict[str, Any]
environments: Dict[str, Any]
logging: Dict[str, Any]
class ConfigLoader:
"""Configuration loader for Bifrost integration tests"""
def __init__(self, config_path: Optional[str] = None):
"""Initialize configuration loader
Args:
config_path: Path to config.yml file. If None, looks for config.yml in project root.
"""
if config_path is None:
# Look for config.yml in project root
project_root = Path(__file__).parent.parent.parent
config_path = project_root / "config.yml"
self.config_path = Path(config_path)
self._config = None
self._load_config()
def _load_config(self):
"""Load configuration from YAML file"""
if not self.config_path.exists():
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
with open(self.config_path, "r") as f:
raw_config = yaml.safe_load(f)
# Expand environment variables
self._config = self._expand_env_vars(raw_config)
def _expand_env_vars(self, obj):
"""Recursively expand environment variables in configuration"""
if isinstance(obj, dict):
return {k: self._expand_env_vars(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [self._expand_env_vars(item) for item in obj]
elif isinstance(obj, str):
# Handle ${VAR:-default} syntax
import re
pattern = r"\$\{([^}]+)\}"
def replace_var(match):
var_expr = match.group(1)
if ":-" in var_expr:
var_name, default_value = var_expr.split(":-", 1)
return os.getenv(var_name, default_value)
else:
return os.getenv(var_expr, "")
return re.sub(pattern, replace_var, obj)
else:
return obj
def get_integration_url(self, integration: str) -> str:
"""Get the complete URL for a integration
Args:
integration: Integration name (openai, anthropic, google, litellm)
Returns:
Complete URL for the integration
Examples:
get_integration_url("openai") -> "http://localhost:8080/openai"
"""
bifrost_config = self._config["bifrost"]
base_url = bifrost_config["base_url"]
endpoint = bifrost_config["endpoints"].get(integration, "")
if not endpoint:
raise ValueError(f"No endpoint configured for integration: {integration}")
return f"{base_url.rstrip('/')}/{endpoint}"
def get_bifrost_config(self) -> BifrostConfig:
"""Get Bifrost configuration"""
bifrost_data = self._config["bifrost"]
return BifrostConfig(
base_url=bifrost_data["base_url"], endpoints=bifrost_data["endpoints"]
)
def get_model(self, integration: str, model_type: str = "chat") -> str:
"""Get model name for an integration and type
Maps integration names to provider configurations.
Args:
integration: Integration name (openai, anthropic, google, litellm, langchain)
model_type: Model type (chat, vision, tools, etc.)
Returns:
Model name for the integration and type
"""
# Map integration to provider
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
if not provider:
raise ValueError(
f"Unknown integration: {integration}. "
f"Valid integrations: {list(INTEGRATION_TO_PROVIDER_MAP.keys())}"
)
# Get model from provider configuration
return self.get_provider_model(provider, model_type)
def get_model_alternatives(self, integration: str) -> list:
"""Get alternative models for an integration"""
# Map integration to provider
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
if not provider:
return []
# Get alternatives from provider configuration
if "providers" not in self._config:
return []
if provider not in self._config["providers"]:
return []
return self._config["providers"][provider].get("alternatives", [])
def get_model_capabilities(self, model: str) -> Dict[str, Any]:
"""Get capabilities for a specific model"""
return self._config["model_capabilities"].get(
model,
{
"chat": True,
"tools": False,
"vision": False,
"max_tokens": 4096,
"context_window": 4096,
},
)
def supports_capability(self, model: str, capability: str) -> bool:
"""Check if a model supports a specific capability"""
caps = self.get_model_capabilities(model)
return caps.get(capability, False)
def get_api_config(self) -> Dict[str, Any]:
"""Get API configuration (timeout, retries, etc.)"""
return self._config["api"]
def get_test_settings(self) -> Dict[str, Any]:
"""Get test configuration settings"""
return self._config["test_settings"]
def get_integration_settings(self, integration: str) -> Dict[str, Any]:
"""Get integration-specific settings"""
return self._config["integration_settings"].get(integration, {})
def get_environment_config(self, environment: str | None = None) -> Dict[str, Any]:
"""Get environment-specific configuration
Args:
environment: Environment name (development, production, etc.)
If None, uses TEST_ENV environment variable or 'development'
"""
if environment is None:
environment = os.getenv("TEST_ENV", "development")
return self._config["environments"].get(environment, {})
def get_logging_config(self) -> Dict[str, Any]:
"""Get logging configuration"""
return self._config["logging"]
def list_integrations(self) -> list:
"""List all configured integrations"""
return list(INTEGRATION_TO_PROVIDER_MAP.keys())
def list_models(self, integration: str | None = None) -> Dict[str, Any]:
"""List all models for an integration or all integrations"""
if integration:
# Map integration to provider
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
if not provider:
raise ValueError(f"Unknown integration: {integration}")
if "providers" not in self._config or provider not in self._config["providers"]:
raise ValueError(f"No provider configuration for: {provider}")
return {integration: self._config["providers"][provider]}
# Return all providers mapped to their integration names
result = {}
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
if "providers" in self._config and provider in self._config["providers"]:
result[integration] = self._config["providers"][provider]
return result
def validate_config(self) -> bool:
"""Validate configuration completeness"""
required_sections = ["bifrost", "providers", "api", "test_settings"]
for section in required_sections:
if section not in self._config:
raise ValueError(f"Missing required configuration section: {section}")
# Validate Bifrost configuration
bifrost = self._config["bifrost"]
if "base_url" not in bifrost or "endpoints" not in bifrost:
raise ValueError("Bifrost configuration missing base_url or endpoints")
# Validate that all integrations map to valid providers
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
if provider not in self._config["providers"]:
raise ValueError(
f"Integration '{integration}' maps to provider '{provider}' "
f"which is not configured in providers section"
)
return True
def print_config_summary(self):
"""Print a summary of the configuration"""
print("🔧 BIFROST INTEGRATION TEST CONFIGURATION")
print("=" * 80)
# Bifrost configuration
bifrost = self.get_bifrost_config()
print("\n🌉 BIFROST GATEWAY:")
print(f" Base URL: {bifrost.base_url}")
print(" Endpoints:")
for integration, endpoint in bifrost.endpoints.items():
full_url = f"{bifrost.base_url.rstrip('/')}/{endpoint}"
print(f" {integration}: {full_url}")
# Model configurations
print("\n🤖 MODEL CONFIGURATIONS (via providers):")
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
if "providers" in self._config and provider in self._config["providers"]:
models = self._config["providers"][provider]
print(f" {integration.upper()}{provider}:")
print(f" Chat: {models.get('chat', 'N/A')}")
print(f" Vision: {models.get('vision', 'N/A')}")
print(f" Tools: {models.get('tools', 'N/A')}")
alternatives = models.get('alternatives', [])
print(f" Alternatives: {len(alternatives)} models")
# API settings
api_config = self.get_api_config()
print("\n⚙️ API SETTINGS:")
print(f" Timeout: {api_config['timeout']}s")
print(f" Max Retries: {api_config['max_retries']}")
print(f" Retry Delay: {api_config['retry_delay']}s")
print(f"\n✅ Configuration loaded successfully from: {self.config_path}")
def get_provider_model(self, provider: str, capability: str = "chat") -> str:
"""Get model name for a provider and capability
Args:
provider: Provider name (e.g., 'openai', 'anthropic', 'gemini')
capability: Capability type (default: 'chat')
Returns:
Model name suitable for the provider and capability
"""
if "providers" not in self._config:
# Fallback to old behavior if providers section doesn't exist
return ""
providers = self._config["providers"]
if provider not in providers:
return ""
provider_models = providers[provider]
return provider_models.get(capability, "")
def get_provider_api_key_env(self, provider: str) -> str:
"""Get the environment variable name for a provider's API key
Args:
provider: Provider name
Returns:
Environment variable name
"""
if "provider_api_keys" not in self._config:
return ""
return self._config["provider_api_keys"].get(provider, "")
def is_provider_available(self, provider: str) -> bool:
"""Check if a provider is available (has API key in environment)
Args:
provider: Provider name
Returns:
True if provider's API key is set in environment
"""
env_var = self.get_provider_api_key_env(provider)
if not env_var:
return False
api_key = os.getenv(env_var)
return api_key is not None and api_key.strip() != ""
def get_available_providers(self) -> List[str]:
"""Get list of providers that are available (have API keys configured)
Returns:
List of available provider names
"""
if "providers" not in self._config:
return []
available = []
for provider in self._config["providers"].keys():
if self.is_provider_available(provider):
available.append(provider)
return available
def provider_supports_scenario(self, provider: str, scenario: str) -> bool:
"""Check if a provider supports a specific test scenario
Args:
provider: Provider name
scenario: Scenario name
Returns:
True if provider supports the scenario
"""
if "provider_scenarios" not in self._config:
return False
if provider not in self._config["provider_scenarios"]:
return False
scenarios = self._config["provider_scenarios"][provider]
return scenarios.get(scenario, False)
def get_providers_for_scenario(self, scenario: str) -> List[str]:
"""Get list of available providers that support a specific scenario
Args:
scenario: Scenario name
Returns:
List of provider names that support the scenario
"""
available_providers = self.get_available_providers()
providers = []
for provider in available_providers:
if self.provider_supports_scenario(provider, scenario):
providers.append(provider)
return providers
def get_scenario_capability(self, scenario: str) -> str:
"""Get the capability type for a scenario
Args:
scenario: Scenario name
Returns:
Capability type (e.g., 'chat', 'vision', 'tools')
"""
if "scenario_capabilities" not in self._config:
return "chat" # Default
return self._config["scenario_capabilities"].get(scenario, "chat")
def get_virtual_key(self) -> str:
"""Get the virtual key value for testing
Returns:
Virtual key string or empty string if not configured
"""
if "virtual_key" not in self._config:
return ""
vk_config = self._config["virtual_key"]
if not vk_config.get("enabled", False):
return ""
return vk_config.get("value", "")
def is_virtual_key_configured(self) -> bool:
"""Check if virtual key testing is enabled and configured
Returns:
True if virtual key is available for testing
"""
vk = self.get_virtual_key()
return vk is not None and vk.strip() != ""
# Global configuration instance
_config_loader = None
def get_config() -> ConfigLoader:
"""Get global configuration instance"""
global _config_loader
if _config_loader is None:
_config_loader = ConfigLoader()
return _config_loader
def get_integration_url(integration: str) -> str:
return get_config().get_integration_url(integration)
def get_model(integration: str, model_type: str = "chat") -> str:
"""Convenience function to get model name"""
return get_config().get_model(integration, model_type)
def get_model_capabilities(model: str) -> Dict[str, Any]:
"""Convenience function to get model capabilities"""
return get_config().get_model_capabilities(model)
def supports_capability(model: str, capability: str) -> bool:
"""Convenience function to check model capability"""
return get_config().supports_capability(model, capability)
def get_provider_model(provider: str, capability: str = "chat") -> str:
"""Convenience function to get provider model"""
return get_config().get_provider_model(provider, capability)
def is_provider_available(provider: str) -> bool:
"""Convenience function to check provider availability"""
return get_config().is_provider_available(provider)
def get_available_providers() -> List[str]:
"""Convenience function to get available providers"""
return get_config().get_available_providers()
def provider_supports_scenario(provider: str, scenario: str) -> bool:
"""Convenience function to check scenario support"""
return get_config().provider_supports_scenario(provider, scenario)
def get_providers_for_scenario(scenario: str) -> List[str]:
"""Convenience function to get providers for scenario"""
return get_config().get_providers_for_scenario(scenario)
def get_virtual_key() -> str:
"""Convenience function to get virtual key"""
return get_config().get_virtual_key()
def is_virtual_key_configured() -> bool:
"""Convenience function to check if virtual key is configured"""
return get_config().is_virtual_key_configured()
if __name__ == "__main__":
# Print configuration summary when run directly
config = get_config()
config.validate_config()
config.print_config_summary()

View File

@@ -0,0 +1,66 @@
"""
Model configurations for each integration.
This file now acts as a compatibility layer and convenience wrapper
around the new configuration system in config.yml and config_loader.py.
All model data is now centralized in config.yml for easier maintenance.
"""
from typing import Dict, List
from dataclasses import dataclass
from .config_loader import get_config
@dataclass
class IntegrationModels:
"""Model configuration for a integration"""
chat: str # Primary chat model
vision: str # Vision/multimodal model
tools: str # Function calling model
alternatives: List[str] # Alternative models for testing
def get_integration_models() -> Dict[str, IntegrationModels]:
"""Get all integration model configurations from config.yml"""
config = get_config()
integration_models = {}
for integration in config.list_integrations():
models_config = config.list_models(integration)
integration_models[integration] = IntegrationModels(
chat=models_config["chat"],
vision=models_config["vision"],
tools=models_config["tools"],
alternatives=models_config["alternatives"],
)
return integration_models
# Backward compatibility - load from config
INTEGRATION_MODELS = get_integration_models()
def get_alternatives(integration: str) -> List[str]:
"""Get alternative models for a integration"""
config = get_config()
return config.get_model_alternatives(integration)
def list_all_models() -> Dict[str, Dict[str, str]]:
"""List all models by integration and type"""
config = get_config()
return config.list_models()
# Print model summary for documentation
def print_model_summary():
"""Print a summary of all models and their capabilities"""
config = get_config()
config.print_config_summary()
if __name__ == "__main__":
print_model_summary()

View File

@@ -0,0 +1,141 @@
"""
Parametrization utilities for cross-provider testing.
This module provides pytest parametrization for testing across multiple AI providers
with automatic scenario-based filtering.
"""
from typing import List, Tuple, Union
from .config_loader import get_config
def get_cross_provider_params_for_scenario(
scenario: str,
include_providers: List[str] | None = None,
exclude_providers: List[str] | None = None,
) -> List[Tuple[str, str]]:
config = get_config()
# Get providers that support this scenario
providers = config.get_providers_for_scenario(scenario)
# Apply include filter
if include_providers:
providers = [p for p in providers if p in include_providers]
# Apply exclude filter
if exclude_providers:
providers = [p for p in providers if p not in exclude_providers]
# Generate (provider, model) tuples
# Automatically maps: scenario → capability → model
params = []
for provider in sorted(providers): # Sort for consistent test ordering
# Map scenario to capability, then get model
capability = config.get_scenario_capability(scenario)
model = config.get_provider_model(provider, capability)
# Only add if provider has a model for this scenario's capability
if model:
params.append((provider, model))
# If no providers available, return a dummy tuple to avoid pytest errors
# The test will be skipped with appropriate message
if not params:
params = [("_no_providers_", "_no_model_")]
return params
def get_cross_provider_params_with_vk_for_scenario(
scenario: str,
include_providers: List[str] | None = None,
exclude_providers: List[str] | None = None,
) -> List[Tuple[str, str, bool]]:
"""
Get cross-provider parameters with virtual key flag for pytest parametrization.
When virtual key is configured, each provider/model combo is tested twice:
once without VK (vk_enabled=False) and once with VK (vk_enabled=True).
Args:
scenario: Test scenario name
include_providers: Optional list of providers to include
exclude_providers: Optional list of providers to exclude
Returns:
List of (provider, model, vk_enabled) tuples
Example:
When VK is configured:
[
("openai", "gpt-4o", False),
("openai", "gpt-4o", True),
("anthropic", "claude-3", False),
("anthropic", "claude-3", True),
]
"""
config = get_config()
# Get base params without VK
base_params = get_cross_provider_params_for_scenario(
scenario, include_providers, exclude_providers
)
# Handle the dummy tuple case
if base_params == [("_no_providers_", "_no_model_")]:
return [("_no_providers_", "_no_model_", False)]
# Build params list with VK flag
params = []
vk_configured = config.is_virtual_key_configured()
for provider, model in base_params:
# Always add the non-VK variant
params.append((provider, model, False))
# Add VK variant only if VK is configured
if vk_configured:
params.append((provider, model, True))
return params
def format_vk_test_id(provider: str, model: str, vk_enabled: bool) -> str:
"""
Format test ID for virtual key parameterized tests.
Args:
provider: Provider name
model: Model name
vk_enabled: Whether VK is enabled
Returns:
Formatted test ID string
Example:
>>> format_vk_test_id("openai", "gpt-4o", True)
"openai-gpt-4o-with_vk"
>>> format_vk_test_id("openai", "gpt-4o", False)
"openai-gpt-4o-no_vk"
"""
vk_suffix = "with_vk" if vk_enabled else "no_vk"
return f"{provider}-{model}-{vk_suffix}"
def format_provider_model(provider: str, model: str) -> str:
"""
Format provider and model into the standard "provider/model" format.
Args:
provider: Provider name
model: Model name
Returns:
Formatted string "provider/model"
Example:
>>> format_provider_model("openai", "gpt-4o")
"openai/gpt-4o"
"""
return f"{provider}/{model}"

5583
tests/integrations/python/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff