first commit

This commit is contained in:
Beyhan Oğur
2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions

View File

@@ -0,0 +1 @@
3.12

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,348 @@
{
"$schema": "https://www.getbifrost.ai/schema",
"mcp": {
"client_configs": [
{
"name": "sse_mcp",
"connection_type": "sse",
"connection_string": "env.MCP_SSE_URL",
"auth_type": "headers",
"headers": {
"Authorization": "env.MCP_SSE_AUTHORIZATION",
"ENV_EXA_API_KEY": "env.MCP_SSE_API_KEY"
},
"tools_to_execute": ["*"],
"tools_to_auto_execute": ["*"]
}
]
},
"providers": {
"openai": {
"keys": [
{
"name": "OpenAI API Key",
"value": "env.OPENAI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"elevenlabs": {
"keys": [
{
"name": "ElevenLabs API Key",
"value": "env.ELEVENLABS_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": false
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"xai": {
"keys": [
{
"name": "Xai API Key",
"value": "env.XAI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": false
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"huggingface": {
"keys": [
{
"name": "Hugging Face API Key",
"value": "env.HUGGING_FACE_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": false
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"anthropic": {
"keys": [
{
"name": "Anthropic API Key",
"value": "env.ANTHROPIC_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"gemini": {
"keys": [
{
"name": "Gemini API Key",
"value": "env.GEMINI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"vertex": {
"keys": [
{
"name": "Vertex API Key",
"vertex_key_config": {
"project_id": "env.VERTEX_PROJECT_ID",
"region": "env.GOOGLE_LOCATION",
"auth_credentials": "env.VERTEX_CREDENTIALS"
},
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"mistral": {
"keys": [
{
"name": "Mistral API Key",
"value": "env.MISTRAL_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"cohere": {
"keys": [
{
"name": "Cohere API Key",
"value": "env.COHERE_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"parasail": {
"keys": [
{
"name": "Parasail API Key",
"value": "env.PARASAIL_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"groq": {
"keys": [
{
"name": "Groq API Key",
"value": "env.GROQ_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"perplexity": {
"keys": [
{
"name": "Perplexity API Key",
"value": "env.PERPLEXITY_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"cerebras": {
"keys": [
{
"name": "Cerebras API Key",
"value": "env.CEREBRAS_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"openrouter": {
"keys": [
{
"name": "OpenRouter API Key",
"value": "env.OPENROUTER_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"azure": {
"keys": [
{
"name": "Azure API Key",
"value": "env.AZURE_API_KEY",
"azure_key_config": {
"endpoint": "env.AZURE_ENDPOINT",
"api_version": "env.AZURE_API_VERSION",
"deployments": {
"gpt-4o": "gpt-4o",
"gpt-4o-mini": "gpt-4o-mini",
"gpt-4o-mini-tts": "gpt-4o-mini-tts",
"o1": "o1",
"gpt-4o-batch": "gpt-4o-batch",
"whisper": "whisper",
"text-embedding-3-small": "text-embedding-3-small",
"gpt-image-1": "gpt-image-1"
}
},
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"bedrock": {
"keys": [
{
"name": "Bedrock API Key",
"bedrock_key_config": {
"access_key": "env.AWS_ACCESS_KEY_ID",
"secret_key": "env.AWS_SECRET_ACCESS_KEY",
"region": "env.AWS_REGION",
"arn": "env.AWS_ARN"
},
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"replicate": {
"keys": [
{
"name": "Replicate API Key",
"value": "env.REPLICATE_API_KEY",
"weight": 1
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"runway": {
"keys": [
{
"name": "Runway API Key",
"value": "env.RUNWAY_API_KEY",
"weight": 1
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"nebius": {
"keys": [
{
"name": "Nebius API Key",
"value": "env.NEBIUS_API_KEY",
"weight": 1
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
}
},
"config_store": {
"enabled": true,
"type": "sqlite",
"config": {
"path": "./config.db"
}
},
"logs_store": {
"enabled": true,
"type": "sqlite",
"config": {
"path": "./logs.db"
}
},
"governance": {
"virtual_keys": [
{
"name": "Test Key",
"id": "vk-test",
"value": "sk-bf-test-key",
"is_active": true,
"provider_configs": [
{ "provider": "openai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "elevenlabs", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "xai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "huggingface", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "anthropic", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "gemini", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "vertex", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "mistral", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "cohere", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "parasail", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "groq", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "perplexity", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "cerebras", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "openrouter", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "azure", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "bedrock", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 }
]
}
]
},
"client": {
"drop_excess_requests": false,
"initial_pool_size": 300,
"allowed_origins": [
"*"
],
"enable_logging": true,
"enforce_auth_on_inference": false,
"allow_direct_keys": false,
"max_request_body_size_mb": 100
}
}

View File

@@ -0,0 +1,942 @@
# Bifrost Integration Tests Configuration
# This file centralizes all configuration for AI integration clients and test settings
# Bifrost Gateway Configuration
# All integrations route through Bifrost as a proxy/gateway
bifrost:
base_url: "${BIFROST_BASE_URL:-http://localhost:8080}"
# Integration-specific endpoints (suffixes appended to base_url)
endpoints:
openai: "openai"
anthropic: "anthropic"
cohere: "cohere"
google: "genai"
"gemini_passthrough": "genai_passthrough"
"anthropic_passthrough": "anthropic_passthrough"
litellm: "litellm"
langchain: "langchain"
pydanticai: "pydanticai"
bedrock: "bedrock"
azure: "openai" # Azure uses OpenAI routes with /openai/deployments/{id} paths
# Full URLs constructed as: {base_url.rstrip('/')}/{endpoints[integration]}
# Examples:
# - OpenAI: http://localhost:8080/openai
# - Anthropic: http://localhost:8080/anthropic
# - Google: http://localhost:8080/genai
# - LiteLLM: http://localhost:8080/litellm
# - LangChain: http://localhost:8080/langchain
# API Configuration
api:
timeout: 30 # seconds
max_retries: 3
retry_delay: 1 # seconds
# Provider model configurations
# Integrations (openai, anthropic, google, litellm, langchain) map to these providers
providers:
openai:
chat: "gpt-4o"
vision: "gpt-4o"
file: "gpt-4o"
tools: "gpt-4o-mini"
speech: "tts-1"
transcription: "whisper-1"
embeddings: "text-embedding-3-small"
image_generation: "gpt-image-1"
image_edit: "gpt-image-1"
streaming: "gpt-4o-mini"
thinking: "gpt-5.1"
batch_file_upload: "gpt-4o-mini"
batch_list: "gpt-4o"
batch_retrieve: "gpt-4o"
batch_cancel: "gpt-4o"
batch_inline: "gpt-4o"
file_upload: "gpt-4o-mini"
file_list: "gpt-4o-mini"
file_retrieve: "gpt-4o-mini"
file_delete: "gpt-4o-mini"
file_content: "gpt-4o-mini"
count_tokens: "gpt-4o-mini"
video: "sora-2"
alternatives:
- "gpt-4"
- "gpt-4-turbo-preview"
- "gpt-4o"
- "gpt-3.5-turbo"
azure:
chat: "gpt-4o"
vision: "gpt-4o"
tools: "gpt-4o-mini"
streaming: "gpt-4o-mini"
speech: "gpt-4o-mini-tts"
transcription: "whisper"
embeddings: "text-embedding-3-small"
image_generation: "gpt-image-1"
thinking: "o1"
batch_file_upload: "gpt-4o-batch"
batch_list: "gpt-4o-batch"
batch_retrieve: "gpt-4o-batch"
batch_cancel: "gpt-4o-batch"
file_upload: "gpt-4o"
file_list: "gpt-4o"
file_retrieve: "gpt-4o"
file_delete: "gpt-4o"
file_content: "gpt-4o"
xai:
chat: "grok-4-0709"
vision: "grok-2-vision-1212"
tools: "grok-4-0709"
streaming: "grok-4-0709"
thinking: "grok-3-mini"
anthropic:
chat: "claude-sonnet-4-5-20250929"
vision: "claude-sonnet-4-5-20250929"
file: "claude-sonnet-4-5-20250929"
tools: "claude-sonnet-4-5-20250929"
streaming: "claude-sonnet-4-5-20250929"
thinking: "claude-opus-4-5"
batch_file_upload: "claude-sonnet-4-20250514"
batch_inline: "claude-sonnet-4-20250514"
batch_list: "claude-sonnet-4-20250514"
batch_retrieve: "claude-sonnet-4-20250514"
batch_cancel: "claude-sonnet-4-20250514"
file_upload: "claude-sonnet-4-20250514"
file_list: "claude-sonnet-4-20250514"
file_retrieve: "claude-sonnet-4-20250514"
file_delete: "claude-sonnet-4-20250514"
file_content: "claude-sonnet-4-20250514"
count_tokens: "claude-sonnet-4-5-20250929"
alternatives:
- "claude-3-sonnet-20240229"
- "claude-3-opus-20240229"
- "claude-3-5-sonnet-20241022"
- "claude-3-haiku-20240307"
gemini:
chat: "gemini-3-flash-preview"
vision: "gemini-3-flash-preview"
tools: "gemini-3-flash-preview"
file: "gemini-2.5-flash"
thinking: "gemini-3-pro-preview"
speech: "gemini-2.5-flash-preview-tts"
transcription: "gemini-2.5-flash"
embeddings: "gemini-embedding-001"
image_generation: "gemini-2.5-flash-image"
image_edit: "gemini-3-pro-image-preview"
imagen: "imagen-4.0-generate-001"
video: "veo-3.1-generate-preview"
streaming: "gemini-3-flash-preview"
batch_create: "gemini-2.5-flash"
batch_inline: "gemini-2.5-flash"
batch_file_upload: "gemini-2.5-flash"
batch_list: "gemini-2.5-flash"
batch_retrieve: "gemini-2.5-flash"
batch_cancel: "gemini-2.5-flash"
batch_s3: "gemini-2.5-flash"
file_upload: "gemini-2.0-flash"
file_list: "gemini-2.0-flash"
file_content: "gemini-2.0-flash"
file_download: "gemini-2.0-flash"
file_retrieve: "gemini-2.0-flash"
file_delete: "gemini-2.0-flash"
count_tokens: "gemini-2.5-flash"
alternatives:
- "gemini-1.5-pro"
- "gemini-1.5-flash"
- "gemini-1.0-pro"
- "gemini-2.0-flash-001"
vertex:
chat: "gemini-2.5-flash"
vision: "claude-sonnet-4-5"
tools: "gemini-2.5-flash"
file: "claude-sonnet-4-5"
thinking: "gemini-2.5-pro"
embeddings: "gemini-embedding-001"
image_generation: "imagen-4.0-generate-001"
image_edit: "imagen-3.0-capability-001"
imagen: "imagen-4.0-generate-001"
streaming: "gemini-2.5-flash"
count_tokens: "claude-sonnet-4-5"
video: "veo-3.1-generate-preview"
bedrock:
chat: "global.anthropic.claude-sonnet-4-20250514-v1:0"
vision: "global.anthropic.claude-sonnet-4-20250514-v1:0"
file: "global.anthropic.claude-sonnet-4-20250514-v1:0"
tools: "global.anthropic.claude-sonnet-4-20250514-v1:0"
streaming: "global.anthropic.claude-sonnet-4-20250514-v1:0"
thinking: "us.anthropic.claude-opus-4-5-20251101-v1:0"
text_completion: "mistral.mistral-7b-instruct-v0:2"
embeddings: "global.cohere.embed-v4:0"
image_generation: "amazon.titan-image-generator-v2:0"
image_variation: "amazon.titan-image-generator-v2:0"
batch_inline: "anthropic.claude-3-5-sonnet-20240620-v1:0"
image_edit: "amazon.nova-canvas-v1:0"
batch_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_retrieve: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_cancel: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
batch_s3: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_delete: "anthropic.claude-3-5-sonnet-20240620-v1:0"
file_content: "anthropic.claude-3-5-sonnet-20240620-v1:0"
count_tokens: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
alternatives:
- "anthropic.claude-3-opus-20240229-v1:0"
cohere:
chat: "command-a-03-2025"
vision: "command-a-vision-07-2025"
tools: "command-a-03-2025"
embeddings: "embed-v4.0"
streaming: "command-a-03-2025"
count_tokens: "command-a-03-2025"
alternatives:
- "command-r-plus"
huggingface:
image_generation: "fal-ai/fal-ai/flux/dev"
image_edit: "fal-ai/fal-ai/flux-2/edit"
nebius:
image_generation: "black-forest-labs/flux-schnell"
replicate:
video: "openai/sora-2-pro"
runway:
video: "gen4.5"
# Provider availability configuration
# Maps provider names to their API key environment variables
provider_api_keys:
openai: "OPENAI_API_KEY"
anthropic: "ANTHROPIC_API_KEY"
gemini: "GEMINI_API_KEY"
vertex: "VERTEX_API_KEY"
bedrock: "AWS_ACCESS_KEY_ID"
cohere: "COHERE_API_KEY"
xai: "XAI_API_KEY"
huggingface: "HUGGING_FACE_API_KEY"
nebius: "NEBIUS_API_KEY"
azure: "AZURE_API_KEY"
replicate: "REPLICATE_API_KEY"
runway: "RUNWAY_API_KEY"
# Provider test scenarios - which tests each provider supports
provider_scenarios:
openai:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
"web_search": true
image_url: true
image_base64: true
file_input: true
multiple_images: true
speech_synthesis: true
speech_synthesis_streaming: true
transcription: true
transcription_streaming: true
embeddings: true
image_generation: true
image_edit: true
thinking: true
prompt_caching: false
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: true # PydanticAI structured output works reliably with OpenAI
pydanticai_streaming: true # PydanticAI streaming works with OpenAI
batch_file_upload: true
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: true # OpenAI supports inline requests for batch
batch_s3: false # OpenAI does not use S3 for batch
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: true
count_tokens: true
video_generation: false # disabled for now because of long running operations
azure:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
web_search: false
image_url: true
image_base64: true
file_input: false
multiple_images: true
speech_synthesis: true
speech_synthesis_streaming: true
transcription: true
transcription_streaming: true
embeddings: true
image_generation: true
image_edit: false
thinking: true
prompt_caching: false
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: false
pydantic_structured_output: false
pydanticai_streaming: false
batch_file_upload: true
batch_create: false
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: false
batch_s3: false
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: true
count_tokens: false
xai:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: true
image_base64: false
file_input: false
multiple_images: false
thinking: true
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: true
pydanticai_streaming: true
anthropic:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
web_search: true
image_url: true
image_base64: true
file_input: true
file_input_text: true
multiple_images: true
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: false
thinking: true
prompt_caching: true
citations: true
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: false
pydantic_structured_output: true # PydanticAI structured output works with Anthropic
pydanticai_streaming: true # PydanticAI streaming works with Anthropic
batch_file_upload: true # Anthropic batch API uses inline requests, not files
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: true # Anthropic uses inline requests for batch
batch_s3: false # Anthropic does not use S3 for batch
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: true
count_tokens: true
gemini:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: false # Gemini requires base64 or file upload
image_base64: true
file_input: true
multiple_images: false
speech_synthesis: true
speech_synthesis_streaming: true
transcription: true
transcription_streaming: true
embeddings: true
image_generation: true # Gemini image generation via responseModalities
image_edit: true # Gemini image editing
imagen: true # Imagen via :predict endpoint
imagen_edit: true # Imagen editing via image_edit model
thinking: true
video_generation: false # disabled for now because of long running operations
prompt_caching: false
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # PydanticAI structured output unreliable via Bifrost for Gemini
pydanticai_streaming: false # PydanticAI GoogleModel streaming has asyncio issues
batch_file_upload: true # Gemini supports file upload via Files API
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: true # Gemini uses inline requests for batch (synchronous)
batch_s3: false # Gemini does not use S3 for batch
file_upload: true
file_list: true
file_retrieve: true
file_delete: true
file_content: false # Gemini doesn't support direct file download
count_tokens: true
context_caching: true # Gemini context caching (Caches API) via Bifrost passthrough
vertex:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: false # Gemini requires base64 or file upload
image_base64: true
file_input: true
multiple_images: false
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: true
image_generation: true
image_edit: true
imagen: true # Imagen via :predict endpoint
imagen_edit: true # Imagen editing via image_edit model
thinking: true
prompt_caching: false
list_models: true
video_generation: false # disabled for now because of long running operations
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # PydanticAI structured output unreliable via Bifrost for Gemini
pydanticai_streaming: false # PydanticAI GoogleModel streaming has asyncio issues
batch_file_upload: false # Gemini supports file upload via Files API
batch_create: false
batch_list: false
batch_retrieve: false
batch_cancel: false
batch_inline: false # Gemini uses inline requests for batch (synchronous)
batch_s3: false # Gemini does not use S3 for batch
file_upload: false
file_list: false
file_retrieve: false
file_delete: false
file_content: false # Gemini doesn't support direct file download
count_tokens: false
bedrock:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: true
image_url: false
image_base64: true
file_input: true
file_input_text: true
multiple_images: false
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: true
thinking: true
prompt_caching: true
citations: false
list_models: true
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # Bedrock not supported in PydanticAI tests
pydanticai_streaming: false # Bedrock not supported in PydanticAI tests
batch_file_upload: true # Bedrock uses S3 wrapper for file uploads
batch_create: true
batch_list: true
batch_retrieve: true
batch_cancel: true
batch_inline: false # Bedrock batch uses S3, not inline requests via API
batch_s3: true # Bedrock uses S3 for batch input/output
file_upload: true # Bedrock uses S3 wrapper for file storage
file_list: true # Bedrock lists files in S3 bucket
file_retrieve: true # Bedrock retrieves S3 object metadata
file_delete: true # Bedrock deletes S3 objects
file_content: true # Bedrock downloads S3 object content
image_generation: true # Bedrock supports image generation via invoke (Titan, SA, cross-provider)
image_edit: true # Bedrock supports image editing via invoke (Titan, SA)
image_variation: true # Bedrock supports image variation via invoke (Titan IMAGE_VARIATION)
count_tokens: true # Bedrock supports token counting via CountTokens API
cohere:
simple_chat: true
multi_turn_conversation: true
streaming: true
tool_calls: true
multiple_tool_calls: true
end2end_tool_calling: true
automatic_function_calling: false
image_url: true
image_base64: true
multiple_images: true
speech_synthesis: false
speech_synthesis_streaming: false
transcription: false
transcription_streaming: false
embeddings: true
thinking: false
prompt_caching: false
citations: false
list_models: false
responses: true
responses_image: true
text_completion: false
langchain_structured_output: true
pydantic_structured_output: false # PydanticAI CohereModel doesn't reliably support structured output
pydanticai_streaming: false # PydanticAI CohereModel doesn't implement streaming
batch_file_upload: false
batch_create: false
batch_list: false
batch_retrieve: false
batch_cancel: false
batch_inline: false # Cohere does not support batch API
batch_s3: false # Cohere does not support batch API
file_upload: false # Cohere does not support Files API
file_list: false
file_retrieve: false
file_delete: false
file_content: false
count_tokens: true
huggingface:
image_generation: true
image_edit: true
nebius:
image_generation: true
replicate:
video_generation: false # disabled for now because of long running operations
runway:
video_generation: false # disabled for now because of long running operations
# Scenario to capability mapping
# Maps test scenario names to their corresponding capability types
scenario_capabilities:
simple_chat: "chat"
multi_turn_conversation: "chat"
responses: "chat"
responses_image: "vision"
text_completion: "chat"
streaming: "streaming"
tool_calls: "tools"
multiple_tool_calls: "tools"
end2end_tool_calling: "tools"
automatic_function_calling: "tools"
web_search: "chat"
image_url: "vision"
image_base64: "vision"
file_input: "file"
file_input_text: "file"
multiple_images: "vision"
speech_synthesis: "speech"
speech_synthesis_streaming: "speech"
transcription: "transcription"
transcription_streaming: "transcription"
embeddings: "embeddings"
image_generation: "image_generation" # Uses image_generation model
image_edit: "image_edit" # Uses image_edit model
imagen: "imagen" # Uses imagen model (Gemini/Vertex)
imagen_edit: "image_edit" # Uses image_edit model for Imagen editing
thinking: "thinking"
prompt_caching: "chat"
citations: "chat"
list_models: "chat"
langchain_structured_output: "chat" # LangChain structured output uses chat capability
count_tokens: "count_tokens" # Token counting capability
pydantic_structured_output: "chat" # Structured output uses chat capability
pydanticai_streaming: "streaming" # PydanticAI streaming uses streaming capability
batch_file_upload: "batch_file_upload" # Uses batch_file_upload model directly
batch_create: "batch_create"
batch_list: "batch_list"
batch_retrieve: "batch_retrieve"
batch_cancel: "batch_cancel"
batch_inline: "batch_inline" # Uses batch_inline model directly
batch_s3: "batch_s3" # Uses batch_s3 model directly
file_upload: "file_upload" # Uses file_upload model directly
file_list: "file_list" # Uses file_list model directly
file_retrieve: "file_retrieve" # Uses file_retrieve model directly
file_delete: "file_delete" # Uses file_delete model directly
file_content: "file_content" # Uses file_content model directly
count_tokens: "chat"
video_generation: "video"
context_caching: "chat" # Gemini Caches API (passthrough)
# Model capabilities matrix
model_capabilities:
# OpenAI Models
"gpt-3.5-turbo":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 4096
context_window: 4096
"gpt-4":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 8192
context_window: 8192
"gpt-4o":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 128000
"gpt-4o-mini":
chat: true
tools: true
vision: true
streaming: true
speech: false
transcription: false
max_tokens: 4096
context_window: 128000
# OpenAI Speech Models
"tts-1":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
max_tokens: null
context_window: null
"tts-1-hd":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
max_tokens: null
context_window: null
# OpenAI Transcription Models
"whisper-1":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: true
embeddings: false
max_tokens: null
context_window: null
# OpenAI Embedding Models
"text-embedding-3-small":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: false
embeddings: true
max_tokens: null
context_window: 8191
dimensions: 1536
"text-embedding-3-large":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: false
embeddings: true
max_tokens: null
context_window: 8191
dimensions: 3072
"text-embedding-ada-002":
chat: false
tools: false
vision: false
streaming: false
speech: false
transcription: false
embeddings: true
max_tokens: null
context_window: 8191
dimensions: 1536
# Anthropic Models
"claude-3-haiku-20240307":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 200000
"claude-3-sonnet-20240229":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 200000
"claude-3-opus-20240229":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 4096
context_window: 200000
# Google Models
"gemini-pro":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 8192
context_window: 32768
"gemini-2.0-flash-001":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 8192
context_window: 32768
"gemini-1.5-pro":
chat: true
tools: true
vision: true
streaming: true
max_tokens: 8192
context_window: 1000000
# Gemini Transcription Models
"gemini-2.5-flash":
chat: true
tools: true
vision: true
streaming: true
speech: false
transcription: true
embeddings: false
max_tokens: 8192
context_window: 1000000
audio_max_duration: 34200 # 9.5 hours in seconds
"gemini-2.5-pro":
chat: true
tools: true
vision: true
streaming: true
speech: false
transcription: true
embeddings: false
max_tokens: 8192
context_window: 2000000
audio_max_duration: 34200 # 9.5 hours in seconds
# Gemini TTS Models
"gemini-2.5-flash-preview-tts":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
embeddings: false
max_tokens: 32000 # 32k token context window for TTS
context_window: 32000
audio_format: "pcm"
sample_rate: 24000
channels: 1
"gemini-2.5-pro-preview-tts":
chat: false
tools: false
vision: false
streaming: false
speech: true
transcription: false
embeddings: false
max_tokens: 32000 # 32k token context window for TTS
context_window: 32000
audio_format: "pcm"
sample_rate: 24000
channels: 1
# Mistral Models
"mistral-7b-instruct":
chat: true
tools: false
vision: false
streaming: true
max_tokens: 4096
context_window: 32768
"mistral-8x7b-instruct":
chat: true
tools: true
vision: false
streaming: true
max_tokens: 4096
context_window: 32768
# Test configuration
test_settings:
# Maximum tokens for test responses
max_tokens:
chat: 100
vision: 200
tools: 100
complex: 300
speech: null # Speech doesn't use token limits
transcription: null # Transcription doesn't use token limits
embeddings: null # Embeddings don't use token limits (text is the input)
# Timeout settings for tests
timeouts:
simple: 30 # seconds
complex: 60 # seconds
# Retry settings for flaky tests
retries:
max_attempts: 3
delay: 2 # seconds
# Integration-specific settings
integration_settings:
openai:
organization: "${OPENAI_ORG_ID:-}"
project: "${OPENAI_PROJECT_ID:-}"
anthropic:
version: "2023-06-01"
google:
project_id: "${GOOGLE_PROJECT_ID:-}"
location: "${GOOGLE_LOCATION:-us-central1}"
litellm:
drop_params: true
debug: false
langchain:
debug: false
streaming: true
bedrock:
region: "${AWS_REGION:-us-west-2}"
s3_bucket: "${AWS_S3_BUCKET:-}"
batch_role_arn: "${AWS_ARN:-}"
output_s3_prefix: "${AWS_OUTPUT_S3_PREFIX:-bifrost-batch-output/}"
azure:
api_version: "${AZURE_API_VERSION:-2024-10-21}"
# Environment-specific overrides
environments:
development:
api:
timeout: 60
max_retries: 5
test_settings:
timeouts:
simple: 60
complex: 120
production:
api:
timeout: 15
max_retries: 2
test_settings:
timeouts:
simple: 20
complex: 40
# Virtual key testing configuration
# When enabled, cross-provider tests will run twice: with and without the x-bf-vk header
virtual_key:
enabled: true
value: "sk-bf-test-key"
# Logging configuration
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: "tests.log"

View File

@@ -0,0 +1,12 @@
{
"type": "service_account",
"project_id": "dummy-bifrost-project",
"private_key_id": "dummy-key-id-12345",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCY+aj4fvYTj4l9\nYcgnEg7f9Y2zcck8bvYrhIY/m0NJpfUV2rOAbvgHJXUgobmUcgf6E9b76AWVN/Wm\nk6dxE+PWj1/DwkaYk4uDHpWFOn6HkF7ypLeGMamSnU+OfKFoUrRW8NfoMgh+uGVt\nwMh82qBztaTJKjN2BlxBepgR0iZKG81ySkyhaUL1Jh99E3AcNULkkp+VHTD51lw6\n4H0B197tY18GUZ+iPK3Laj9HBVOAjxqsCs4cMsWZ16R+dfZr8ZcDC0zodhcVNSsX\nA7uKZ5tKChJQEzHhk4o8ywnrsyd4E9FHKHsbs+Ye7K5qrTOxpKpDiy6DOPlknZ0/\nzLjk+SARAgMBAAECggEAAepUIktYZnmvblI//Sj8rHdJRoJGOqxNcnaW+4b+euUW\nQ1CspV1+U51amCBvza6kZ+0gaKEhi3lAAhfYQFx5YGtHTbHtKwjTL6oDrKKTncx9\nz/oJYeV6vVTOGGCjZQx2f30DwJZE0XG/1Qpl6L9SSBv14HlwY/6US75snRsWvCAc\nYlWJCozpn5ycSZDbqQBSVF3ueeHoH4ahL5Iw2NELAk87HLBGbtfvfwWpnQqlJtIi\ndQWqYYBHrqk5ThQNcJl1o3oBY7MMVE6/jWZbr+aIXtgfQlG3j+Z6PD6/7g3z65Yy\nxiOrypqpEm8UhmmxoF7UjSci+32NLj7SfdgXM2QLgwKBgQDP3iOCWBmzK72tWeF9\nkNKTXn/6niJHgaPhVTVYsAEoQMcWBtGSUF7QRWZt4qu8APRTwhThFpLVLSZOk50I\ndf9xFhWqubsif/ox1Fbd3SGswwoWhHvGHQ/JH/75akpMKTXkkLDomNNL49kNwW1E\nmb1EJPOeyuOxhw8gP7v8qJ8cAwKBgQC8ZaLgYUmMsc+IDpREN22fXNmoBE0OgrIK\nBLa9rQRAbzdmKcNxLpGAEsyiuPOrgD/9U2G9hM6kztCN53Ho86rphFiHHgN2NJfH\n/Jz/jTtM3UPKv0QCHuLTZknLFeYE3A0jNYFpRi/hjy2n0E4Gtp/0Y0ZULseMSvM5\naN7CWGS5WwKBgQDPCWb+vTcjwO5UCdDQ2v0RsS7w9K4Z4KLUnaTbp7oPWK2yX6o+\n+/PjpywFSJ5aS+0Ou6FGK9ClqSmdW+MteTGqdh+wgvtDuon9NYwrwMN4qm6SzPPm\n+C0v2sF/tIE56FX4SLEbipPx44fd7okhqarcg51uzJAK0wWazkAzv9Nx9wKBgEUk\n9EtvyWO22tkvqKEEytoDZOrycSmTNC7THhKtTnMrnmSDjXSbx9D+lVZflSbrkhCy\nqpu5A3KfaRG70SXTUHYWGbu1e0XF9bLzdtegCRSj3L6rxhUVKuC1mP3NUreT38p9\nV7rAhNA/EV2W6RwzqK80RFqfNKO72lrGr4MamBUjAoGBAJzy/47STnaW23aPutJF\nU23Kp5QDSkZzCniDBNIbuxlgZ5x2m4wK0FPRwWBcuvisG3G9VXohEfxJ0/IG8t6/\nOH1tVXYeR9pWtGIWEZuzFHL38ji4/BL3i94gW26GntJrr1ut94KHN1ynqkYRP/gK\ngRU91/0vXG+SOTubYUh5G5w3\n-----END PRIVATE KEY-----\n",
"client_email": "dummy-bifrost@dummy-bifrost-project.iam.gserviceaccount.com",
"client_id": "123456789012345678901",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dummy-bifrost%40dummy-bifrost-project.iam.gserviceaccount.com"
}

View File

@@ -0,0 +1,126 @@
[project]
name = "bifrost-integration-tests"
version = "0.1.0"
description = "Production-ready end-to-end test suite for testing AI integrations through Bifrost proxy"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
# Core testing framework
"pytest>=7.0.0",
"pytest-asyncio>=0.21.0",
# Environment and configuration
"python-dotenv>=1.0.0",
"PyYAML>=6.0",
# Image processing
"Pillow>=9.0.0",
# HTTP requests for debugging
"requests>=2.28.0",
# Type hints
"typing-extensions>=4.0.0",
# Test reporting
"pytest-html>=3.1.0",
"pytest-cov>=4.0.0",
# AI/ML SDK dependencies
"openai>=1.30.0",
"anthropic>=0.25.0",
"litellm==1.80.5",
"langchain-openai==0.1.0",
"langchain-core==0.3.81",
"langchain-anthropic==0.1.0",
"langchain-google-genai==4.1.1",
"langchain-mistralai==0.1.0",
"langgraph>=0.1.0",
"mistralai>=0.4.0",
"google-genai>=1.50.0",
"pydantic-ai>=0.1.0",
"boto3>=1.34.0",
# Testing utilities
"websocket-client>=1.6.0",
"httpx>=0.25.0",
"pytest-timeout>=2.1.0",
"pytest-mock>=3.11.0",
"pytest-rerunfailures>=11.0",
"langchain-google-vertexai>=3.1.0",
"langchain-tests>=1.0.2",
"langchain>=1.1.0",
"langchain-community>=0.4.1",
"langchain-aws>=1.1.0",
"pytest-xdist>=3.8.0",
"pyasn1>=0.6.2",
]
[project.optional-dependencies]
dev = [
"black>=23.0.0", # Code formatting
"flake8>=6.0.0", # Linting
"mypy>=1.5.0", # Type checking
]
[tool.pytest.ini_options]
# Test discovery
testpaths = ["."]
python_files = "test_*.py"
python_classes = "Test*"
python_functions = "test_*"
# Output formatting
addopts = [
"-v",
"-s", # Show print statements (no output capture)
"--tb=short",
"--strict-markers",
"--disable-warnings",
"--color=yes",
]
# Logging configuration
log_cli = true
log_cli_level = "ERROR"
log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s"
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
# Timeout settings (3 minutes per test)
timeout = 300
# Markers for test categorization
markers = [
"integration: marks tests as integration tests",
"slow: marks tests as slow running",
"e2e: marks tests as end-to-end tests",
"tool_calling: marks tests as tool calling tests",
"flaky: marks tests as flaky with automatic retries (reruns=3, reruns_delay=2)",
]
# Minimum version
minversion = "7.0"
[tool.black]
line-length = 100
target-version = ['py38', 'py39', 'py310', 'py311']
include = '\.pyi?$'
[tool.mypy]
python_version = "3.11"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = false
ignore_missing_imports = true
[tool.coverage.run]
source = ["tests"]
omit = ["*/tests/*", "*/venv/*", "*/.venv/*"]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]
[tool.uv]
exclude-newer = "2026-04-08"

View File

@@ -0,0 +1,343 @@
#!/usr/bin/env python3
"""
Bifrost Integration End-to-End Test Runner
This script runs all integration end-to-end tests for Bifrost.
It can run tests individually or all together, providing comprehensive
reporting and flexible execution options.
Usage:
python run_all_tests.py # Run all tests
python run_all_tests.py --integration openai # Run specific integration
python run_all_tests.py --list # List available integrations
python run_all_tests.py --parallel # Run tests in parallel
python run_all_tests.py --verbose # Verbose output
"""
import argparse
import subprocess
import sys
import time
import os
from pathlib import Path
from typing import List, Dict, Optional
import concurrent.futures
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class BifrostTestRunner:
"""Main test runner for Bifrost integration tests"""
def __init__(self):
self.test_dir = Path(__file__).parent
self.integrations = {
"openai": {
"file": "tests/integrations/test_openai.py",
"description": "OpenAI Python SDK integration tests",
"env_vars": ["OPENAI_API_KEY"],
},
"anthropic": {
"file": "tests/integrations/test_anthropic.py",
"description": "Anthropic Python SDK integration tests",
"env_vars": ["ANTHROPIC_API_KEY"],
},
"litellm": {
"file": "tests/integrations/test_litellm.py",
"description": "LiteLLM integration tests",
"env_vars": ["OPENAI_API_KEY"], # LiteLLM can use OpenAI key
},
"langchain": {
"file": "tests/integrations/test_langchain.py",
"description": "LangChain integration tests",
"env_vars": [
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
], # LangChain uses multiple providers
},
"google": {
"file": "tests/integrations/test_google.py",
"description": "Google GenAI integration tests",
"env_vars": ["GOOGLE_API_KEY"],
},
"bedrock": {
"file": "tests/integrations/test_bedrock.py",
"description": "Bedrock integration tests",
"env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
},
}
self.results = {}
def check_environment(self, integration: str) -> bool:
"""Check if required environment variables are set for an integration"""
config = self.integrations[integration]
missing_vars = []
for var in config["env_vars"]:
if not os.getenv(var):
missing_vars.append(var)
if missing_vars:
print(
f"⚠ Skipping {integration}: Missing environment variables: {', '.join(missing_vars)}"
)
return False
return True
def run_integration_test(self, integration: str, verbose: bool = False) -> Dict:
"""Run tests for a specific integration"""
if integration not in self.integrations:
return {"success": False, "error": f"Unknown integration: {integration}"}
config = self.integrations[integration]
test_file = self.test_dir / config["file"]
if not test_file.exists():
return {"success": False, "error": f"Test file not found: {test_file}"}
# Check environment variables
if not self.check_environment(integration):
return {
"success": False,
"error": "Missing required environment variables",
"skipped": True,
}
print(f"\n{'='*60}")
print(f"Running {integration.upper()} Integration Tests")
print(f"{'='*60}")
print(f"Description: {config['description']}")
print(f"Test file: {config['file']}")
start_time = time.time()
try:
# Run the test with pytest
cmd = [sys.executable, "-m", "pytest", str(test_file)]
# Add pytest flags for better output
if verbose:
cmd.extend(["-v", "-s"]) # verbose and don't capture output
else:
cmd.append("-q") # quiet mode
if verbose:
result = subprocess.run(
cmd, cwd=self.test_dir, text=True, capture_output=False, timeout=300
)
else:
result = subprocess.run(
cmd, cwd=self.test_dir, text=True, capture_output=True, timeout=300
)
elapsed_time = time.time() - start_time
success = result.returncode == 0
return {
"success": success,
"return_code": result.returncode,
"stdout": result.stdout if not verbose else "",
"stderr": result.stderr if not verbose else "",
"elapsed_time": elapsed_time,
}
except subprocess.TimeoutExpired:
return {
"success": False,
"error": "Test timed out (5 minutes)",
"elapsed_time": 300,
}
except Exception as e:
return {
"success": False,
"error": str(e),
"elapsed_time": time.time() - start_time,
}
def run_all_tests(self, parallel: bool = False, verbose: bool = False) -> None:
"""Run all integration tests"""
print("Bifrost Integration End-to-End Test Suite")
print("=" * 50)
print(f"Running tests for {len(self.integrations)} integrations")
print(f"Parallel execution: {'Enabled' if parallel else 'Disabled'}")
print(f"Verbose output: {'Enabled' if verbose else 'Disabled'}")
# Check Bifrost availability
bifrost_url = os.getenv("BIFROST_BASE_URL", "http://localhost:8080")
print(f"Bifrost URL: {bifrost_url}")
start_time = time.time()
if parallel:
self._run_parallel(verbose)
else:
self._run_sequential(verbose)
total_time = time.time() - start_time
self._print_summary(total_time)
def _run_sequential(self, verbose: bool) -> None:
"""Run tests sequentially"""
for integration in self.integrations:
self.results[integration] = self.run_integration_test(integration, verbose)
def _run_parallel(self, verbose: bool) -> None:
"""Run tests in parallel"""
print("\nRunning tests in parallel...")
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
# Submit all tests
future_to_integration = {
executor.submit(
self.run_integration_test, integration, verbose
): integration
for integration in self.integrations
}
# Collect results
for future in concurrent.futures.as_completed(future_to_integration):
integration = future_to_integration[future]
try:
self.results[integration] = future.result()
except Exception as e:
self.results[integration] = {"success": False, "error": str(e)}
def _print_summary(self, total_time: float) -> None:
"""Print test summary"""
print(f"\n{'='*60}")
print("TEST SUMMARY")
print(f"{'='*60}")
passed = 0
failed = 0
skipped = 0
for integration, result in self.results.items():
status = (
"SKIPPED"
if result.get("skipped")
else ("PASSED" if result["success"] else "FAILED")
)
elapsed = result.get("elapsed_time", 0)
if result.get("skipped"):
skipped += 1
print(
f"{integration:12} {status:8} - {result.get('error', 'Unknown error')}"
)
elif result["success"]:
passed += 1
print(f"{integration:12} {status:8} - {elapsed:.2f}s")
else:
failed += 1
error_msg = result.get("error", "Unknown error")
print(f"{integration:12} {status:8} - {error_msg}")
# Print stderr if available
if "stderr" in result and result["stderr"]:
print(f" Error output: {result['stderr'][:200]}...")
print(f"\n{'='*60}")
print(
f"Total: {len(self.integrations)} | Passed: {passed} | Failed: {failed} | Skipped: {skipped}"
)
print(f"Total time: {total_time:.2f} seconds")
print(f"{'='*60}")
# Exit with appropriate code
if failed > 0:
sys.exit(1)
else:
print("All tests completed successfully!")
def list_integrations(self) -> None:
"""List available integrations"""
print("Available Integrations:")
print("=" * 30)
for integration, config in self.integrations.items():
env_status = "" if self.check_environment(integration) else ""
print(f"{env_status} {integration:12} - {config['description']}")
print(f" Required env vars: {', '.join(config['env_vars'])}")
print()
def main():
parser = argparse.ArgumentParser(
description="Run Bifrost integration end-to-end tests",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python run_all_tests.py # Run all tests
python run_all_tests.py --integration openai # Run OpenAI tests only
python run_all_tests.py --parallel --verbose # Run all tests in parallel with verbose output
python run_all_tests.py --list # List available integrations
""",
)
parser.add_argument(
"--integration", "-i", help="Run tests for specific integration only"
)
parser.add_argument(
"--list",
"-l",
action="store_true",
help="List available integrations and their status",
)
parser.add_argument(
"--parallel",
"-p",
action="store_true",
help="Run tests in parallel (faster but less readable output)",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Enable verbose output (shows test output in real-time)",
)
args = parser.parse_args()
runner = BifrostTestRunner()
if args.list:
runner.list_integrations()
return
if args.integration:
if args.integration not in runner.integrations:
print(f"Error: Unknown integration '{args.integration}'")
print(f"Available integrations: {', '.join(runner.integrations.keys())}")
sys.exit(1)
result = runner.run_integration_test(args.integration, args.verbose)
if result["success"]:
print(f"\n{args.integration} tests passed!")
else:
error_msg = result.get("error", "Unknown error")
print(f"\n{args.integration} tests failed: {error_msg}")
# Show stdout/stderr if available
if result.get("stdout"):
print("\n--- Test Output ---")
print(result["stdout"])
if result.get("stderr"):
print("\n--- Error Output ---")
print(result["stderr"])
sys.exit(1)
else:
runner.run_all_tests(args.parallel, args.verbose)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python3
"""
Integration-specific test runner for Bifrost integration tests.
This script runs tests for each integration independently using their native SDKs.
No more complex gateway conversions - just direct testing!
"""
import os
import sys
import argparse
import subprocess
from pathlib import Path
from typing import List, Optional
def check_api_keys():
"""Check which API keys are available"""
keys = {
"openai": os.getenv("OPENAI_API_KEY"),
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
"google": os.getenv("GOOGLE_API_KEY"),
"litellm": os.getenv("LITELLM_API_KEY"),
"bedrock": os.getenv("AWS_ACCESS_KEY_ID"),
}
available = [integration for integration, key in keys.items() if key]
missing = [integration for integration, key in keys.items() if not key]
return available, missing
def run_integration_tests(
integrations: List[str], test_pattern: Optional[str] = None, verbose: bool = False
):
"""Run tests for specified integrations"""
results = {}
for integration in integrations:
print(f"\n{'='*60}")
print(f"🧪 TESTING {integration.upper()} INTEGRATION")
print(f"{'='*60}")
# Build pytest command with absolute path relative to script location
script_dir = Path(__file__).parent
test_file = script_dir / "tests" / "integrations" / f"test_{integration}.py"
# Check if test file exists
if not test_file.exists():
print(f"❌ Test file not found: {test_file}")
results[integration] = {"error": f"Test file not found: {test_file}"}
continue
cmd = ["python", "-m", "pytest", str(test_file)]
if test_pattern:
cmd.extend(["-k", test_pattern])
if verbose:
cmd.append("-v")
else:
cmd.append("-q")
# Remove integration-specific marker (not needed for file-based selection)
# cmd.extend(["-m", integration])
# Run the tests
try:
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=True,
)
results[integration] = {
"returncode": result.returncode,
"stdout": result.stdout,
"stderr": "", # stderr is now captured in stdout
}
# Print results
print(f"{integration.upper()} tests PASSED")
if verbose:
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"{integration.upper()} tests FAILED")
results[integration] = {
"returncode": e.returncode,
"stdout": e.stdout,
"stderr": "", # stderr is captured in stdout
}
# Always print output on failure to show what went wrong
if e.stdout:
print(e.stdout)
except Exception as e:
print(f"❌ Error running {integration} tests: {e}")
results[integration] = {"error": str(e)}
return results
def print_summary(
results: dict, available_integrations: List[str], missing_integrations: List[str]
):
"""Print final summary"""
print(f"\n{'='*80}")
print("🎯 FINAL SUMMARY")
print(f"{'='*80}")
# API Key Status
print(f"\n🔑 API Key Status:")
for integration in available_integrations:
print(f"{integration.upper()}: Available")
for integration in missing_integrations:
print(f"{integration.upper()}: Missing API key")
# Test Results
print(f"\n📊 Test Results:")
passed_integrations = []
failed_integrations = []
for integration, result in results.items():
if "error" in result:
print(f" 💥 {integration.upper()}: Error - {result['error']}")
failed_integrations.append(integration)
elif result["returncode"] == 0:
print(f"{integration.upper()}: All tests passed")
passed_integrations.append(integration)
else:
print(f"{integration.upper()}: Some tests failed")
failed_integrations.append(integration)
# Overall Status
total_tested = len(results)
total_passed = len(passed_integrations)
print(f"\n🏆 Overall Results:")
print(f" Integrations tested: {total_tested}")
print(f" Integrations passed: {total_passed}")
print(
f" Success rate: {(total_passed/total_tested)*100:.1f}%"
if total_tested > 0
else " Success rate: N/A"
)
if failed_integrations:
print(f"\n⚠️ Failed integrations: {', '.join(failed_integrations)}")
print(" Check the detailed output above for specific test failures.")
def main():
parser = argparse.ArgumentParser(
description="Run integration-specific integration tests"
)
parser.add_argument(
"--integrations",
nargs="+",
choices=["openai", "anthropic", "google", "litellm", "all"],
default=["all"],
help="Integrations to test (default: all available)",
)
parser.add_argument(
"--test", help="Run specific test pattern (e.g., 'test_01_simple_chat')"
)
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument(
"--check-keys", action="store_true", help="Only check API key availability"
)
parser.add_argument(
"--show-models",
action="store_true",
help="Show model configuration for all integrations",
)
args = parser.parse_args()
# Check API keys
available_integrations, missing_integrations = check_api_keys()
if args.check_keys:
print("🔑 API Key Status:")
for integration in available_integrations:
print(f"{integration.upper()}: Available")
for integration in missing_integrations:
print(f"{integration.upper()}: Missing")
return
if args.show_models:
# Import and show model configuration using absolute path
script_dir = Path(__file__).parent
models_path = script_dir / "tests" / "utils" / "models.py"
if not models_path.exists():
print(f"❌ Models file not found: {models_path}")
sys.exit(1)
# Add the parent directory to sys.path to enable the import
models_parent_dir = str(script_dir)
if models_parent_dir not in sys.path:
sys.path.insert(0, models_parent_dir)
try:
from tests.utils.models import print_model_summary
print_model_summary()
except ImportError as e:
print(f"❌ Could not import print_model_summary: {e}")
print(f"Tried to import from: {models_path}")
sys.exit(1)
return
# Determine which integrations to test
if "all" in args.integrations:
integrations_to_test = available_integrations
requested_integrations = [
"openai",
"anthropic",
"google",
"litellm",
] # all possible integrations
else:
integrations_to_test = [
p for p in args.integrations if p in available_integrations
]
requested_integrations = args.integrations
if not integrations_to_test:
print("❌ No integrations available for testing. Please set API keys.")
print("\nRequired environment variables for requested integrations:")
for integration in requested_integrations:
if integration != "all": # Skip the "all" keyword
api_key_name = f"{integration.upper()}_API_KEY"
print(f" - {api_key_name}")
sys.exit(1)
# Calculate which requested integrations are missing API keys
requested_missing_integrations = [
integration
for integration in requested_integrations
if integration in missing_integrations
]
# Show what we're about to test
print("🚀 Starting integration tests...")
print(f"📋 Testing integrations: {', '.join(integrations_to_test)}")
if requested_missing_integrations:
print(
f"⏭️ Skipping integrations (no API key): {', '.join(requested_missing_integrations)}"
)
# Run tests
results = run_integration_tests(integrations_to_test, args.test, args.verbose)
# Print summary
print_summary(results, available_integrations, requested_missing_integrations)
# Exit with appropriate code
failed_count = sum(
1 for r in results.values() if r.get("returncode", 1) != 0 or "error" in r
)
sys.exit(failed_count)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,8 @@
"""
Bifrost Integration Tests
Production-ready test suite for testing various AI integrations through Bifrost proxy.
Supports multiple integrations with uniform test interface.
"""
__version__ = "1.0.0"

View File

@@ -0,0 +1,188 @@
"""
Pytest configuration for integration-specific tests.
"""
import pytest
import os
import logging
def pytest_configure(config):
"""Configure pytest with custom markers and logging"""
# Configure logging
logging.basicConfig(
level=logging.ERROR,
format='%(asctime)s [%(levelname)8s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# Add custom markers
config.addinivalue_line("markers", "openai: mark test as requiring OpenAI API key")
config.addinivalue_line(
"markers", "anthropic: mark test as requiring Anthropic API key"
)
config.addinivalue_line("markers", "google: mark test as requiring Google API key")
config.addinivalue_line("markers", "litellm: mark test as requiring LiteLLM setup")
config.addinivalue_line("markers", "azure: Azure OpenAI integration tests")
config.addinivalue_line(
"markers", "flaky: mark test as flaky with automatic retries (reruns=3, reruns_delay=2)"
)
def pytest_collection_modifyitems(config, items):
"""Modify test collection to add markers based on test file names"""
# Add flaky marker to all tests for retry on failure
flaky_marker = pytest.mark.flaky(reruns=3, reruns_delay=2)
for item in items:
# Add flaky marker to all tests
item.add_marker(flaky_marker)
# Add markers based on test file location
if "test_openai" in item.nodeid:
item.add_marker(pytest.mark.openai)
elif "test_anthropic" in item.nodeid:
item.add_marker(pytest.mark.anthropic)
elif "test_google" in item.nodeid:
item.add_marker(pytest.mark.google)
elif "test_litellm" in item.nodeid:
item.add_marker(pytest.mark.litellm)
elif "test_azure" in item.nodeid:
item.add_marker(pytest.mark.azure)
@pytest.fixture(scope="session")
def api_keys():
"""Collect all available API keys"""
return {
"openai": os.getenv("OPENAI_API_KEY"),
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
"google": os.getenv("GOOGLE_API_KEY"),
"litellm": os.getenv("LITELLM_API_KEY"),
"azure": os.getenv("AZURE_API_KEY"),
}
@pytest.fixture(scope="session")
def available_integrations(api_keys):
"""Determine which integrations are available based on API keys"""
available = []
if api_keys["openai"]:
available.append("openai")
if api_keys["anthropic"]:
available.append("anthropic")
if api_keys["google"]:
available.append("google")
if api_keys["litellm"]:
available.append("litellm")
if api_keys["azure"]:
available.append("azure")
return available
@pytest.fixture
def test_summary():
"""Fixture to collect test results for summary reporting"""
results = {"passed": [], "failed": [], "skipped": []}
return results
def pytest_runtest_makereport(item, call):
"""Hook to capture test results"""
# Only record results during the "call" phase to avoid double counting
if call.when == "call":
# Extract integration and test info
integration = None
if "test_openai" in item.nodeid:
integration = "openai"
elif "test_anthropic" in item.nodeid:
integration = "anthropic"
elif "test_google" in item.nodeid:
integration = "google"
elif "test_litellm" in item.nodeid:
integration = "litellm"
elif "test_azure" in item.nodeid:
integration = "azure"
test_name = item.name
# Store result info
result_info = {
"integration": integration,
"test": test_name,
"nodeid": item.nodeid,
}
if hasattr(item.session, "test_results"):
if call.excinfo is None:
item.session.test_results["passed"].append(result_info)
else:
result_info["error"] = str(call.excinfo.value)
item.session.test_results["failed"].append(result_info)
def pytest_sessionstart(session):
"""Initialize test results collection"""
session.test_results = {"passed": [], "failed": [], "skipped": []}
def pytest_sessionfinish(session, exitstatus):
"""Print test summary at the end"""
results = session.test_results
print("\n" + "=" * 80)
print("INTEGRATION TEST SUMMARY")
print("=" * 80)
# Group results by integration
integration_results = {}
for result in results["passed"] + results["failed"] + results["skipped"]:
integration = result.get("integration", "unknown")
if integration and integration not in integration_results:
integration_results[integration] = {"passed": 0, "failed": 0, "skipped": 0}
for result in results["passed"]:
integration = result.get("integration", "unknown")
if integration and integration in integration_results:
integration_results[integration]["passed"] += 1
for result in results["failed"]:
integration = result.get("integration", "unknown")
if integration and integration in integration_results:
integration_results[integration]["failed"] += 1
for result in results["skipped"]:
integration = result.get("integration", "unknown")
if integration and integration in integration_results:
integration_results[integration]["skipped"] += 1
# Print summary by integration
for integration, counts in integration_results.items():
total = counts["passed"] + counts["failed"] + counts["skipped"]
if total > 0:
print(f"\n{integration.upper()} Integration:")
print(f" ✅ Passed: {counts['passed']}")
print(f" ❌ Failed: {counts['failed']}")
print(f" ⏭️ Skipped: {counts['skipped']}")
print(f" 📊 Total: {total}")
if counts["passed"] > 0:
success_rate = (
(counts["passed"] / (counts["passed"] + counts["failed"])) * 100
if (counts["passed"] + counts["failed"]) > 0
else 0
)
print(f" 🎯 Success Rate: {success_rate:.1f}%")
# Print failed tests details
if results["failed"]:
print(f"\n❌ FAILED TESTS ({len(results['failed'])}):")
for result in results["failed"]:
print(f"{result['integration']}: {result['test']}")
if "error" in result:
print(f" Error: {result['error']}")
print("\n" + "=" * 80)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,911 @@
"""
LiteLLM Integration Tests
🤖 MODELS USED:
- Chat: gpt-3.5-turbo (OpenAI via LiteLLM)
- Vision: gpt-4o (OpenAI via LiteLLM)
- Tools: gpt-3.5-turbo (OpenAI via LiteLLM)
- Speech: tts-1 (OpenAI via LiteLLM)
- Transcription: whisper-1 (OpenAI via LiteLLM)
- Embeddings: text-embedding-3-small (OpenAI via LiteLLM)
- Alternatives: claude-3-haiku-20240307, gemini-pro, mistral-7b-instruct, gpt-4, command-r-plus
Tests all 19 core scenarios using LiteLLM SDK directly:
1. Simple chat
2. Multi turn conversation
3. Tool calls
4. Multiple tool calls
5. End2End tool calling
6. Automatic function calling
7. Image (url)
8. Image (base64)
9. Multiple images
10. Complete end2end test with conversation history, tool calls, tool results and images
11. Integration specific tests
12. Error handling
13. Streaming
14. Google Gemini integration
15. Mistral integration
16. OpenAI embeddings via LiteLLM
17. OpenAI speech synthesis via LiteLLM
18. OpenAI transcription via LiteLLM
19. Multi-provider comparison
"""
import pytest
import json
import litellm
from typing import List, Dict, Any
from .utils.common import (
Config,
SIMPLE_CHAT_MESSAGES,
MULTI_TURN_MESSAGES,
SINGLE_TOOL_CALL_MESSAGES,
MULTIPLE_TOOL_CALL_MESSAGES,
IMAGE_URL_MESSAGES,
IMAGE_BASE64_MESSAGES,
MULTIPLE_IMAGES_MESSAGES,
COMPLEX_E2E_MESSAGES,
INVALID_ROLE_MESSAGES,
STREAMING_CHAT_MESSAGES,
STREAMING_TOOL_CALL_MESSAGES,
WEATHER_TOOL,
CALCULATOR_TOOL,
mock_tool_response,
assert_valid_chat_response,
assert_has_tool_calls,
assert_valid_image_response,
assert_valid_error_response,
assert_error_propagation,
assert_valid_streaming_response,
collect_streaming_content,
extract_tool_calls,
get_api_key,
skip_if_no_api_key,
COMPARISON_KEYWORDS,
WEATHER_KEYWORDS,
LOCATION_KEYWORDS,
# Audio and embeddings test data
EMBEDDINGS_SINGLE_TEXT,
EMBEDDINGS_MULTIPLE_TEXTS,
EMBEDDINGS_SIMILAR_TEXTS,
SPEECH_TEST_INPUT,
generate_test_audio,
assert_valid_speech_response,
assert_valid_transcription_response,
assert_valid_embedding_response,
assert_valid_embeddings_batch_response,
calculate_cosine_similarity,
collect_streaming_transcription_content,
get_provider_voice,
get_provider_voices,
# Token counting test data
INPUT_TOKENS_SIMPLE_TEXT,
INPUT_TOKENS_LONG_TEXT,
INPUT_TOKENS_WITH_SYSTEM,
)
from .utils.config_loader import get_model
from .utils.parametrize import (
get_cross_provider_params_for_scenario,
format_provider_model,
)
# LiteLLM-specific provider exclusions
# Bedrock and Cohere don't work well through LiteLLM proxy
# Gemini is excluded because LiteLLM routes it through Vertex AI-specific endpoints
# that Bifrost's LiteLLM integration doesn't support
LITELLM_EXCLUDED_PROVIDERS = ["bedrock", "cohere", "gemini"]
@pytest.fixture
def test_config():
"""Test configuration"""
return Config()
@pytest.fixture(autouse=True)
def setup_litellm(monkeypatch):
"""Setup LiteLLM with Bifrost configuration and dummy credentials"""
import os
from .utils.config_loader import get_integration_url, get_config
from unittest.mock import MagicMock
# Set dummy credentials since Bifrost handles actual authentication
os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
os.environ["MISTRAL_API_KEY"] = "dummy-mistral-key-bifrost-handles-auth"
# For Google, set all possible API key environment variables
os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
os.environ["VERTEX_PROJECT"] = "dummy-vertex-project"
os.environ["VERTEX_LOCATION"] = "us-central1"
# Set dummy Google Application Credentials to prevent Vertex AI from trying to authenticate
# LiteLLM will load these dummy credentials but all actual requests go through Bifrost
from pathlib import Path
dummy_creds_path = Path(__file__).parent.parent / "dummy-gcp-credentials.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(dummy_creds_path)
# litellm._turn_on_debug()
# Mock credential refresh to prevent actual Google API calls
# Since Bifrost handles auth, we don't need LiteLLM to authenticate
def mock_refresh(self, request):
"""Mock refresh that sets a dummy token - Bifrost handles real auth"""
import datetime
self.token = "dummy-access-token-bifrost-handles-auth"
self.expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
try:
from google.oauth2 import service_account
monkeypatch.setattr(service_account.Credentials, "refresh", mock_refresh)
except ImportError:
pass # google-auth not installed
# Get Bifrost URL for LiteLLM
base_url = get_integration_url("litellm")
config = get_config()
integration_settings = config.get_integration_settings("litellm")
api_config = config.get_api_config()
# Configure LiteLLM globally
if base_url:
litellm.api_base = base_url
# Set timeout and other settings
litellm.request_timeout = api_config.get("timeout", 30)
# Apply integration-specific settings
if integration_settings.get("drop_params"):
litellm.drop_params = integration_settings["drop_params"]
if integration_settings.get("debug"):
litellm.set_verbose = integration_settings["debug"]
def convert_to_litellm_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert common tool format to LiteLLM format (OpenAI-compatible)"""
return [{"type": "function", "function": tool} for tool in tools]
class TestLiteLLMIntegration:
"""Test suite for LiteLLM integration covering all 11 core scenarios"""
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"simple_chat", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_01_simple_chat(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 1: Simple chat interaction"""
response = litellm.completion(
model=model,
messages=SIMPLE_CHAT_MESSAGES,
max_tokens=100,
)
assert_valid_chat_response(response)
assert response.choices[0].message.content is not None
assert len(response.choices[0].message.content) > 0
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"multi_turn_conversation", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_02_multi_turn_conversation(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 2: Multi-turn conversation"""
response = litellm.completion(
model=model,
messages=MULTI_TURN_MESSAGES,
max_tokens=150,
)
assert_valid_chat_response(response)
content = response.choices[0].message.content.lower()
# Should mention population or numbers since we asked about Paris population
assert any(word in content for word in ["population", "million", "people", "inhabitants"])
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_03_single_tool_call(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 3: Single tool call"""
tools = convert_to_litellm_tools([WEATHER_TOOL])
response = litellm.completion(
model=model,
messages=SINGLE_TOOL_CALL_MESSAGES,
tools=tools,
max_tokens=100,
)
assert_has_tool_calls(response, expected_count=1)
tool_calls = extract_tool_calls(response)
assert tool_calls[0]["name"] == "get_weather"
assert "location" in tool_calls[0]["arguments"]
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"multiple_tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_04_multiple_tool_calls(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 4: Multiple tool calls in one response"""
tools = convert_to_litellm_tools([WEATHER_TOOL, CALCULATOR_TOOL])
response = litellm.completion(
model=model,
messages=MULTIPLE_TOOL_CALL_MESSAGES,
tools=tools,
max_tokens=200,
)
assert_has_tool_calls(response, expected_count=2)
tool_calls = extract_tool_calls(response)
tool_names = [tc["name"] for tc in tool_calls]
assert "get_weather" in tool_names
assert "calculate" in tool_names
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"end2end_tool_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_05_end2end_tool_calling(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 5: Complete tool calling flow with responses"""
messages = [{"role": "user", "content": "What's the weather in Boston?"}]
tools = convert_to_litellm_tools([WEATHER_TOOL])
response = litellm.completion(
model=model,
messages=messages,
tools=tools,
max_tokens=100,
)
assert_has_tool_calls(response, expected_count=1)
# Add assistant's tool call to conversation
messages.append(response.choices[0].message)
# Add tool response
tool_calls = extract_litellm_tool_calls(response)
tool_response = mock_tool_response(tool_calls[0]["name"], tool_calls[0]["arguments"])
messages.append(
{
"role": "tool",
"tool_call_id": response.choices[0].message.tool_calls[0].id,
"content": tool_response,
}
)
# Get final response
final_response = litellm.completion(
model=get_model("litellm", "chat"), messages=messages, max_tokens=150
)
assert_valid_chat_response(final_response)
content = final_response.choices[0].message.content.lower()
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
assert any(word in content for word in weather_location_keywords)
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"automatic_function_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_06_automatic_function_calling(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 6: Automatic function calling"""
tools = convert_to_litellm_tools([CALCULATOR_TOOL])
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": "Calculate 25 * 4 for me"}],
tools=tools,
tool_choice="auto",
max_tokens=100,
)
# Should automatically choose to use the calculator
assert_has_tool_calls(response, expected_count=1)
tool_calls = extract_litellm_tool_calls(response)
assert tool_calls[0]["name"] == "calculate"
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"image_url", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_07_image_url(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 7: Image analysis from URL"""
response = litellm.completion(
model=model,
messages=IMAGE_URL_MESSAGES,
max_tokens=200,
)
assert_valid_image_response(response)
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"image_base64", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_08_image_base64(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 8: Image analysis from base64"""
response = litellm.completion(
model=model,
messages=IMAGE_BASE64_MESSAGES,
max_tokens=200,
)
assert_valid_image_response(response)
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"multiple_images", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_09_multiple_images(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 9: Multiple image analysis"""
response = litellm.completion(
model=model,
messages=MULTIPLE_IMAGES_MESSAGES,
max_tokens=300,
)
assert_valid_image_response(response)
content = response.choices[0].message.content.lower()
# Should mention comparison or differences
assert any(
word in content for word in COMPARISON_KEYWORDS
), f"Response should contain comparison keywords. Got content: {content}"
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"complex_e2end", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
@pytest.mark.skipif(True, reason="Known flaky test")
def test_10_complex_end2end(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 10: Complex end-to-end with conversation, images, and tools"""
messages = COMPLEX_E2E_MESSAGES.copy()
tools = convert_to_litellm_tools([WEATHER_TOOL])
# First, analyze the image
response1 = litellm.completion(
model=model,
messages=messages,
tools=tools,
max_tokens=300,
)
# Should either describe image or call weather tool (or both)
assert (
response1.choices[0].message.content is not None
or response1.choices[0].message.tool_calls is not None
)
# Add response to conversation
messages.append(response1.choices[0].message)
# If there were tool calls, handle them
if response1.choices[0].message.tool_calls:
for tool_call in response1.choices[0].message.tool_calls:
tool_name = tool_call.function.name
tool_args = json.loads(tool_call.function.arguments)
tool_response = mock_tool_response(tool_name, tool_args)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_response,
}
)
# Get final response after tool calls
final_response = litellm.completion(model=model, messages=messages, max_tokens=200)
assert_valid_chat_response(final_response)
@pytest.mark.skip(reason="known flaky test")
def test_11_integration_specific_features(self, test_config):
"""Test Case 11: LiteLLM-specific features"""
# Test 1: Multiple integrations through LiteLLM
# Note: Gemini is excluded as LiteLLM routes it through Vertex AI-specific endpoints
integrations_to_test = [
"gpt-3.5-turbo", # OpenAI
"claude-3-haiku-20240307", # Anthropic
"mistral/mistral-7b-instruct", # Mistral
]
for model in integrations_to_test:
try:
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": "Hello, how are you?"}],
max_tokens=50,
)
assert_valid_chat_response(response)
except Exception as e:
# Some integrations might not be available, skip gracefully
pytest.skip(f"Integration {model} not available: {e}")
# Test 2: Function calling with specific tool choice
tools = convert_to_litellm_tools([CALCULATOR_TOOL, WEATHER_TOOL])
response2 = litellm.completion(
model=get_model("litellm", "chat"),
messages=[{"role": "user", "content": "What's 15 + 27?"}],
tools=tools,
tool_choice={"type": "function", "function": {"name": "calculate"}},
max_tokens=100,
)
assert_has_tool_calls(response2, expected_count=1)
tool_calls = extract_litellm_tool_calls(response2)
assert tool_calls[0]["name"] == "calculate"
# Test 3: Temperature and other parameters
response3 = litellm.completion(
model=get_model("litellm", "chat"),
messages=[{"role": "user", "content": "Tell me a creative story in one sentence."}],
temperature=0.9,
top_p=0.9,
max_tokens=100,
)
assert_valid_chat_response(response3)
def test_12_error_handling_invalid_roles(self, test_config):
"""Test Case 12: Error handling for invalid roles"""
with pytest.raises(Exception) as exc_info:
litellm.completion(
model=get_model("litellm", "chat"),
messages=INVALID_ROLE_MESSAGES,
max_tokens=100,
)
# Verify the error is properly caught and contains role-related information
error = exc_info.value
assert_valid_error_response(error, "tester")
assert_error_propagation(error, "litellm")
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"streaming", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_13_streaming(self, test_config, provider, model):
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
"""Test Case 13: Streaming chat completion"""
# Test basic streaming
stream = litellm.completion(
model=model,
messages=STREAMING_CHAT_MESSAGES,
max_tokens=200,
stream=True,
)
content, chunk_count, tool_calls_detected = collect_streaming_content(
stream, "openai", timeout=120 # LiteLLM uses OpenAI format
)
# Validate streaming results
assert chunk_count > 0, "Should receive at least one chunk"
assert len(content) > 10, "Should receive substantial content"
assert not tool_calls_detected, "Basic streaming shouldn't have tool calls"
# Test streaming with tool calls
stream_with_tools = litellm.completion(
model=model,
messages=STREAMING_TOOL_CALL_MESSAGES,
max_tokens=150,
tools=convert_to_litellm_tools([WEATHER_TOOL]),
stream=True,
)
content_tools, chunk_count_tools, tool_calls_detected_tools = collect_streaming_content(
stream_with_tools, "openai", timeout=120 # LiteLLM uses OpenAI format
)
# Validate tool streaming results
assert chunk_count_tools > 0, "Should receive at least one chunk with tools"
assert tool_calls_detected_tools, "Should detect tool calls in streaming response"
@pytest.mark.skip(reason="known flaky test")
def test_14_gemini_integration(self, test_config):
"""Test Case 14: Google Gemini integration through LiteLLM"""
try:
# Test basic chat with Gemini
response = litellm.completion(
model="gemini-2.0-flash-001",
messages=[
{
"role": "user",
"content": "What is machine learning? Answer in one sentence.",
}
],
max_tokens=100,
)
assert_valid_chat_response(response)
content = response.choices[0].message.content.lower()
assert any(
word in content for word in ["machine", "learning", "data", "algorithm"]
), f"Response should mention ML concepts. Got: {content}"
# Test with tool calling if supported
tools = convert_to_litellm_tools([CALCULATOR_TOOL])
response_tools = litellm.completion(
model="gemini-2.0-flash-001",
messages=[{"role": "user", "content": "Calculate 42 * 17"}],
tools=tools,
max_tokens=100,
)
# Gemini should either use tools or provide calculation
if response_tools.choices[0].message.tool_calls:
assert_has_tool_calls(response_tools, expected_count=1)
else:
# Should at least provide the calculation result
content = response_tools.choices[0].message.content
assert "714" in content or "42" in content, "Should provide calculation result"
except Exception as e:
pytest.skip(f"Gemini integration not available: {e}")
@pytest.mark.skip(reason="known flaky test")
def test_15_mistral_integration(self, test_config):
"""Test Case 15: Mistral integration through LiteLLM"""
try:
# Test basic chat with Mistral
response = litellm.completion(
model="mistral/mistral-7b-instruct",
messages=[
{
"role": "user",
"content": "Explain recursion in programming briefly.",
}
],
max_tokens=150,
)
assert_valid_chat_response(response)
content = response.choices[0].message.content.lower()
assert any(
word in content for word in ["recursion", "function", "itself", "call"]
), f"Response should explain recursion. Got: {content}"
# Test with different temperature
response_creative = litellm.completion(
model="mistral/mistral-7b-instruct",
messages=[{"role": "user", "content": "Write a haiku about code."}],
temperature=0.8,
max_tokens=100,
)
assert_valid_chat_response(response_creative)
except Exception as e:
pytest.skip(f"Mistral integration not available: {e}")
@pytest.mark.skip(reason="known flaky test")
def test_16_openai_embeddings_via_litellm(self, test_config):
"""Test Case 16: OpenAI embeddings through LiteLLM"""
try:
# Test single text embedding
response = litellm.embedding(
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
input=EMBEDDINGS_SINGLE_TEXT,
)
assert_valid_embedding_response(response, expected_dimensions=1536)
# Test batch embeddings
batch_response = litellm.embedding(
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
input=EMBEDDINGS_MULTIPLE_TEXTS,
)
assert_valid_embeddings_batch_response(
batch_response, len(EMBEDDINGS_MULTIPLE_TEXTS), expected_dimensions=1536
)
# Test similarity analysis
similar_response = litellm.embedding(
model=get_model("litellm", "embeddings") or "text-embedding-3-small",
input=EMBEDDINGS_SIMILAR_TEXTS,
)
embeddings = [
item["embedding"] if isinstance(item, dict) else item.embedding
for item in (
similar_response["data"]
if isinstance(similar_response, dict)
else similar_response.data
)
]
# Calculate similarity between similar texts
similarity = calculate_cosine_similarity(embeddings[0], embeddings[1])
assert (
similarity > 0.7
), f"Similar texts should have high similarity, got {similarity:.4f}"
except Exception as e:
pytest.skip(f"OpenAI embeddings through LiteLLM not available: {e}")
def test_17_openai_speech_via_litellm(self, test_config):
"""Test Case 17: OpenAI speech synthesis through LiteLLM"""
try:
# Test basic speech synthesis
response = litellm.speech(
model=get_model("litellm", "speech") or "tts-1",
voice=get_provider_voice("openai", "primary"),
input=SPEECH_TEST_INPUT,
)
# LiteLLM might return different response format
if hasattr(response, "content"):
audio_content = response.content
elif isinstance(response, bytes):
audio_content = response
else:
audio_content = response
assert_valid_speech_response(audio_content)
# Test with different voice
response2 = litellm.speech(
model=get_model("litellm", "speech") or "tts-1",
voice=get_provider_voice("openai", "secondary"),
input="Short test message for voice comparison.",
response_format="mp3",
)
if hasattr(response2, "content"):
audio_content2 = response2.content
elif isinstance(response2, bytes):
audio_content2 = response2
else:
audio_content2 = response2
assert_valid_speech_response(audio_content2, expected_audio_size_min=500)
# Different voices should produce different audio
assert (
audio_content != audio_content2
), "Different voices should produce different audio"
except Exception as e:
pytest.skip(f"OpenAI speech through LiteLLM not available: {e}")
def test_18_openai_transcription_via_litellm(self, test_config):
"""Test Case 18: OpenAI transcription through LiteLLM"""
try:
# Generate test audio for transcription
test_audio = generate_test_audio()
# Test basic transcription
response = litellm.transcription(
model=get_model("litellm", "transcription") or "whisper-1",
file=("test_audio.wav", test_audio, "audio/wav"),
)
assert_valid_transcription_response(response)
# Test with additional parameters
response2 = litellm.transcription(
model=get_model("litellm", "transcription") or "whisper-1",
file=("test_audio.wav", test_audio, "audio/wav"),
language="en",
temperature=0.0,
)
assert_valid_transcription_response(response2)
except Exception as e:
pytest.skip(f"OpenAI transcription through LiteLLM not available: {e}")
def test_19_multi_provider_comparison(self, test_config):
"""Test Case 19: Compare responses across different providers through LiteLLM"""
test_prompt = "What is the capital of Japan? Answer in one word."
models_to_test = [
"gpt-3.5-turbo", # OpenAI
"claude-3-haiku-20240307", # Anthropic
"gemini-2.0-flash-001", # Google
]
responses = {}
for model in models_to_test:
try:
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": test_prompt}],
max_tokens=50,
)
assert_valid_chat_response(response)
responses[model] = response.choices[0].message.content.lower()
except Exception as e:
print(f"Model {model} not available: {e}")
continue
# Verify that we got at least one response
assert len(responses) > 0, "Should get at least one successful response"
# All responses should mention Tokyo or Japan
for model, content in responses.items():
assert any(
word in content for word in ["tokyo", "japan"]
), f"Model {model} should mention Tokyo. Got: {content}"
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_20_token_counter_simple_text(self, test_config, provider, model):
"""Test Case 20: Count tokens from simple text using LiteLLM token_counter"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
# Count tokens using text parameter
token_count = litellm.token_counter(
model=model,
text=INPUT_TOKENS_SIMPLE_TEXT,
)
# Validate token count
assert isinstance(token_count, int), "Token count should be an integer"
assert token_count > 0, "Token count should be positive"
# Simple text should have a reasonable token count (between 3-20 tokens)
assert 3 <= token_count <= 20, (
f"Simple text should have 3-20 tokens, got {token_count}"
)
except Exception as e:
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_21_token_counter_with_messages(self, test_config, provider, model):
"""Test Case 21: Count tokens from messages with system message using LiteLLM token_counter"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
# Count tokens using messages parameter
token_count = litellm.token_counter(
model=model,
messages=INPUT_TOKENS_WITH_SYSTEM,
)
# Validate token count
assert isinstance(token_count, int), "Token count should be an integer"
assert token_count > 0, "Token count should be positive"
# With system message should have more tokens than simple text
assert token_count > 2, (
f"With system message should have >2 tokens, got {token_count}"
)
except Exception as e:
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
@pytest.mark.parametrize(
"provider, model",
get_cross_provider_params_for_scenario(
"count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
),
)
def test_22_token_counter_long_text(self, test_config, provider, model):
"""Test Case 22: Count tokens from long text using LiteLLM token_counter"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
# Count tokens using text parameter with long text
token_count = litellm.token_counter(
model=model,
text=INPUT_TOKENS_LONG_TEXT,
)
# Validate token count
assert isinstance(token_count, int), "Token count should be an integer"
assert token_count > 100, (
f"Long text should have >100 tokens, got {token_count}"
)
except Exception as e:
pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
# Additional helper functions specific to LiteLLM
def extract_litellm_tool_calls(response: Any) -> List[Dict[str, Any]]:
"""Extract tool calls from LiteLLM response format (OpenAI-compatible) with proper type checking"""
tool_calls = []
# Type check for LiteLLM response (OpenAI-compatible format)
if not hasattr(response, "choices") or not response.choices:
return tool_calls
choice = response.choices[0]
if not hasattr(choice, "message") or not hasattr(choice.message, "tool_calls"):
return tool_calls
if not choice.message.tool_calls:
return tool_calls
for tool_call in choice.message.tool_calls:
if hasattr(tool_call, "function") and hasattr(tool_call.function, "name"):
try:
arguments = (
json.loads(tool_call.function.arguments)
if isinstance(tool_call.function.arguments, str)
else tool_call.function.arguments
)
tool_calls.append(
{
"name": tool_call.function.name,
"arguments": arguments,
}
)
except (json.JSONDecodeError, AttributeError) as e:
print(f"Warning: Failed to parse LiteLLM tool call arguments: {e}")
continue
return tool_calls

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,781 @@
"""
Pydantic AI Integration Tests - Cross-Provider Support
🌉 CROSS-PROVIDER TESTING:
This test suite uses Pydantic AI to test against multiple AI providers through Bifrost.
Tests automatically run against all available providers with proper capability filtering.
🤖 PYDANTIC AI COMPONENTS TESTED:
- Agent: Core agent class for running LLM interactions
- Models: OpenAI (OpenAIChatModel), Anthropic (AnthropicModel), Google (GoogleModel), Cohere (CohereModel)
- Providers: OpenAIProvider, AnthropicProvider, GoogleProvider, CohereProvider
- Tools: Function tools with @agent.tool decorator
- Structured Output: Pydantic BaseModel result types
- Streaming: Real-time response streaming
- Async Operations: agent.run() async patterns
⚠️ PROVIDER LIMITATIONS:
- Bedrock: Not supported in PydanticAI tests - tested separately in test_bedrock.py
Tests Pydantic AI standard interface compliance and Bifrost integration:
1. Basic Agent chat - Cross-provider
2. Agent with system prompt (instructions) - Cross-provider
3. Multi-turn conversation with message history - Cross-provider
4. Tool calling with @agent.tool decorator - Cross-provider
5. End-to-end tool calling with multi-turn flow - Cross-provider
6. Structured output with Pydantic models - Cross-provider
7. Streaming responses - Cross-provider
8. Async operations
9. Error handling
10. Tool with context - Cross-provider
11. Multiple tools - Cross-provider
12. Result validation
13. Usage tracking
14. Message history inspection
15. Dynamic instructions
"""
import pytest
import asyncio
import os
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from pydantic import BaseModel, Field
from pydantic_ai import Agent, RunContext, Tool
# Pydantic AI model imports
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider
# Optional provider imports
try:
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.providers.anthropic import AnthropicProvider
ANTHROPIC_AVAILABLE = True
except ImportError:
ANTHROPIC_AVAILABLE = False
AnthropicModel = None
AnthropicProvider = None
try:
from pydantic_ai.models.google import GoogleModel
from pydantic_ai.providers.google import GoogleProvider
GOOGLE_AVAILABLE = True
except ImportError:
GOOGLE_AVAILABLE = False
GoogleModel = None
GoogleProvider = None
try:
from cohere import AsyncClientV2 as CohereAsyncClient
from pydantic_ai.models.cohere import CohereModel
from pydantic_ai.providers.cohere import CohereProvider
COHERE_AVAILABLE = True
except ImportError:
COHERE_AVAILABLE = False
CohereAsyncClient = None
CohereModel = None
CohereProvider = None
from .utils.common import (
Config,
SIMPLE_CHAT_MESSAGES,
MULTI_TURN_MESSAGES,
WEATHER_TOOL,
CALCULATOR_TOOL,
EMBEDDINGS_SINGLE_TEXT,
EMBEDDINGS_MULTIPLE_TEXTS,
mock_tool_response,
assert_valid_chat_response,
get_api_key,
skip_if_no_api_key,
WEATHER_KEYWORDS,
LOCATION_KEYWORDS,
)
from .utils.config_loader import get_model, get_integration_url, get_config
from .utils.parametrize import (
get_cross_provider_params_for_scenario,
format_provider_model,
)
@pytest.fixture
def test_config():
"""Test configuration"""
return Config()
@pytest.fixture(autouse=True)
def setup_pydanticai():
"""Setup Pydantic AI with Bifrost configuration and dummy credentials"""
# Set dummy credentials since Bifrost handles actual authentication
os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
os.environ["CO_API_KEY"] = "dummy-cohere-key-bifrost-handles-auth"
yield
# Cleanup is handled by pytest
def get_openai_model(model_name: str | None = None) -> OpenAIChatModel:
"""Create an OpenAI model configured for Bifrost"""
base_url = get_integration_url("pydanticai")
if model_name is None:
model_name = get_model("pydanticai", "chat")
provider = OpenAIProvider(
base_url=f"{base_url}/v1",
api_key="dummy-openai-key-bifrost-handles-auth"
)
return OpenAIChatModel(model_name, provider=provider)
def get_anthropic_model(model_name: str = "claude-3-haiku-20240307") -> Optional[Any]:
"""Create an Anthropic model configured for Bifrost"""
if not ANTHROPIC_AVAILABLE:
return None
base_url = get_integration_url("pydanticai")
# Note: Anthropic SDK adds /v1 internally, so we don't append it here
# (unlike OpenAI SDK which expects /v1 in the base URL)
provider = AnthropicProvider(
base_url=base_url,
api_key="dummy-anthropic-key-bifrost-handles-auth"
)
return AnthropicModel(model_name, provider=provider)
def get_google_model(model_name: str = "gemini-2.0-flash") -> Optional[Any]:
"""Create a Google model configured for Bifrost"""
if not GOOGLE_AVAILABLE:
return None
base_url = get_integration_url("pydanticai")
# Configure GoogleProvider with Bifrost endpoint
provider = GoogleProvider(
api_key="dummy-google-api-key-bifrost-handles-auth",
base_url=base_url
)
return GoogleModel(model_name, provider=provider)
def get_cohere_model(model_name: str = "command-r7b-12-2024") -> Optional[Any]:
"""Create a Cohere model configured for Bifrost"""
if not COHERE_AVAILABLE:
return None
base_url = get_integration_url("pydanticai")
# Cohere SDK's AsyncClientV2 accepts base_url parameter
# We create a custom client pointing to Bifrost and pass it to CohereProvider
cohere_client = CohereAsyncClient(
api_key="dummy-cohere-key-bifrost-handles-auth",
base_url=base_url
)
provider = CohereProvider(
cohere_client=cohere_client
)
return CohereModel(model_name, provider=provider)
def get_pydanticai_model_for_provider(provider: str, model: str) -> Any:
"""
Factory function to create a Pydantic AI model for a given provider.
This is the cross-provider equivalent of format_provider_model() used in Bedrock tests,
but returns actual Pydantic AI model objects instead of string identifiers.
Args:
provider: Provider name (e.g., 'openai', 'anthropic', 'gemini', 'cohere')
model: Model name (e.g., 'gpt-4o-mini', 'claude-sonnet-4-20250514')
Returns:
Configured Pydantic AI model object for the provider
Raises:
ValueError: If provider is not supported or required SDK is not available
"""
provider_lower = provider.lower()
if provider_lower == "openai":
return get_openai_model(model)
elif provider_lower == "anthropic":
if not ANTHROPIC_AVAILABLE:
raise ValueError(f"Anthropic SDK not available for provider '{provider}'")
return get_anthropic_model(model)
elif provider_lower in ["gemini", "google"]:
if not GOOGLE_AVAILABLE:
raise ValueError(f"Google GenAI SDK not available for provider '{provider}'")
return get_google_model(model)
elif provider_lower == "cohere":
if not COHERE_AVAILABLE:
raise ValueError(f"Cohere SDK not available for provider '{provider}'")
return get_cohere_model(model)
elif provider_lower == "bedrock":
# Bedrock is tested separately in test_bedrock.py using the native Bedrock API
# PydanticAI doesn't have native Bedrock support, and using OpenAI SDK causes
# validation errors due to response format differences (e.g., empty service_tier)
raise ValueError(
f"Provider 'bedrock' is not supported in PydanticAI tests - "
f"use test_bedrock.py for Bedrock testing"
)
else:
raise ValueError(f"Unsupported provider: {provider}. Supported: openai, anthropic, gemini, cohere")
# Structured output models for testing
class CityInfo(BaseModel):
"""Information about a city"""
city: str = Field(description="Name of the city")
country: str = Field(description="Country where the city is located")
class WeatherResponse(BaseModel):
"""Weather information response"""
location: str = Field(description="Location for the weather")
temperature: str = Field(description="Current temperature")
conditions: str = Field(description="Weather conditions description")
class CalculationResult(BaseModel):
"""Result of a calculation"""
expression: str = Field(description="The mathematical expression")
result: float = Field(description="The calculated result")
class TestPydanticAIIntegration:
"""Comprehensive Pydantic AI integration tests through Bifrost"""
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
def test_01_basic_agent_chat(self, test_config, provider, model):
"""Test Case 1: Basic Agent chat functionality - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions="Be concise, reply with one sentence.",
)
result = agent.run_sync("Hello! How are you today?")
assert result is not None
assert result.output is not None
assert len(str(result.output)) > 0
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
def test_02_agent_with_system_prompt(self, test_config, provider, model):
"""Test Case 2: Agent with custom system prompt (instructions) - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions=(
"You are a helpful geography expert. "
"Always mention the continent when discussing cities."
),
)
result = agent.run_sync("What is the capital of France?")
assert result is not None
assert result.output is not None
content = str(result.output).lower()
assert "paris" in content
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multi_turn_conversation"))
def test_03_multi_turn_conversation(self, test_config, provider, model):
"""Test Case 3: Multi-turn conversation with message history - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions="You are a helpful assistant. Remember context from previous messages.",
)
# First turn
result1 = agent.run_sync("My name is Alice.")
# Second turn - should remember the name
result2 = agent.run_sync(
"What is my name?",
message_history=result1.all_messages(),
)
assert result2 is not None
assert result2.output is not None
content = str(result2.output).lower()
assert "alice" in content
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
def test_04_tool_calling(self, test_config, provider, model):
"""Test Case 4: Tool calling with @agent.tool decorator - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
# Define tools as functions
def get_weather(location: str) -> str:
"""Get the current weather for a location."""
return f"The weather in {location} is 72°F and sunny."
def calculate(expression: str) -> str:
"""Perform a mathematical calculation."""
try:
# Safe evaluation for simple expressions
result = eval(expression.replace("x", "*").replace("×", "*"))
return f"The result of {expression} is {result}"
except Exception:
return f"Could not calculate {expression}"
agent = Agent(
pydantic_model,
tools=[get_weather, calculate],
instructions="You are a helpful assistant that can check weather and do calculations.",
)
result = agent.run_sync("What's the weather like in Boston?")
assert result is not None
assert result.output is not None
content = str(result.output).lower()
# Should either mention weather info or Boston
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
assert any(
word in content for word in weather_location_keywords
), f"Response should mention weather or location. Got: {content}"
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("end2end_tool_calling"))
def test_05_end2end_tool_calling(self, test_config, provider, model):
"""Test Case 5: Complete end-to-end tool calling flow with multi-turn conversation - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
# Define a tool that we'll manually execute
def get_weather(location: str) -> str:
"""Get the current weather for a location."""
return f"The weather in {location} is 72°F and sunny."
agent = Agent(
pydantic_model,
tools=[get_weather],
instructions="You are a helpful assistant that can check weather.",
)
# Step 1: Initial request - should trigger tool call
result1 = agent.run_sync("What's the weather in Boston in fahrenheit?")
assert result1 is not None
assert result1.output is not None
# Pydantic AI automatically executes tools, so result1.output should contain
# the final response with weather information.
# Verify the response contains weather information
content = str(result1.output).lower()
weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
assert any(
word in content for word in weather_location_keywords
), f"Response should mention weather or location. Got: {content}"
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydantic_structured_output"))
def test_06_structured_output(self, test_config, provider, model):
"""Test Case 5: Structured output with Pydantic models - runs on providers with reliable PydanticAI structured output support"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
output_type=CityInfo,
instructions="Extract city information from the user's question.",
)
result = agent.run_sync("Tell me about Paris, the capital of France.")
assert result is not None
assert result.output is not None
assert isinstance(result.output, CityInfo)
assert result.output.city.lower() == "paris"
assert "france" in result.output.country.lower()
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydanticai_streaming"))
def test_07_streaming_responses(self, test_config, provider, model):
"""Test Case 7: Streaming response functionality - runs on providers with PydanticAI streaming support"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
agent = Agent(
pydantic_model,
instructions="You are a storyteller. Tell short, engaging stories.",
)
# Use async streaming with proper event loop handling
async def run_streaming():
chunks = []
async with agent.run_stream("Tell me a very short story about a robot.") as response:
async for chunk in response.stream_text():
chunks.append(chunk)
return "".join(chunks), len(chunks)
# Use asyncio.new_event_loop() to avoid conflicts with existing event loops
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
full_content, chunk_count = loop.run_until_complete(run_streaming())
finally:
loop.close()
assert chunk_count > 0, "Should receive streaming chunks"
assert len(full_content) > 0, "Should have content from streaming"
assert any(
word in full_content.lower() for word in ["robot", "story", "once"]
), f"Response should be a story about robots. Got: {full_content[:200]}"
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
def test_08_async_operations(self, test_config):
"""Test Case 8: Async operation support"""
async def async_test():
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Be concise.",
)
result = await agent.run("Hello from async!")
assert result is not None
assert result.output is not None
assert len(str(result.output)) > 0
return True
except Exception as e:
pytest.skip(f"Async operations through Pydantic AI not available: {e}")
return False
result = asyncio.run(async_test())
if result is not False:
assert result is True
def test_09_error_handling(self, test_config):
"""Test Case 9: Error handling for invalid requests"""
try:
# Test with invalid model name
base_url = get_integration_url("pydanticai")
provider = OpenAIProvider(
base_url=f"{base_url}/v1",
api_key="dummy-key"
)
model = OpenAIChatModel("invalid-model-name-should-fail", provider=provider)
agent = Agent(model)
with pytest.raises(Exception) as exc_info:
agent.run_sync("This should fail gracefully.")
# Should get a meaningful error
error_message = str(exc_info.value).lower()
assert any(
word in error_message
for word in ["model", "error", "invalid", "not found", "does not exist"]
)
except Exception as e:
pytest.skip(f"Error handling test through Pydantic AI not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
def test_10_tool_with_context(self, test_config, provider, model):
"""Test Case 10: Tool with RunContext for dependency injection - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
@dataclass
class UserDeps:
user_name: str
user_id: int
def get_user_info(ctx: RunContext[UserDeps]) -> str:
"""Get information about the current user."""
return f"User: {ctx.deps.user_name} (ID: {ctx.deps.user_id})"
agent = Agent(
pydantic_model,
deps_type=UserDeps,
tools=[Tool(get_user_info, takes_ctx=True)],
instructions="You can look up user information when asked.",
)
deps = UserDeps(user_name="Alice", user_id=123)
result = agent.run_sync("What is my user information?", deps=deps)
assert result is not None
assert result.output is not None
content = str(result.output).lower()
# Should mention Alice or user info
assert "alice" in content or "user" in content
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
@pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multiple_tool_calls"))
def test_11_multiple_tools(self, test_config, provider, model):
"""Test Case 11: Multiple tools in single agent - runs across all available providers"""
if provider == "_no_providers_" or model == "_no_model_":
pytest.skip("No providers configured for this scenario")
try:
pydantic_model = get_pydanticai_model_for_provider(provider, model)
def get_weather(location: str) -> str:
"""Get weather for a location."""
return f"Weather in {location}: 72°F, sunny"
def get_time(timezone: str) -> str:
"""Get current time in a timezone."""
return f"Current time in {timezone}: 2:30 PM"
def translate(text: str, target_language: str) -> str:
"""Translate text to another language."""
return f"'{text}' in {target_language}: [translated]"
agent = Agent(
pydantic_model,
tools=[get_weather, get_time, translate],
instructions="You can check weather, time, and translate text.",
)
result = agent.run_sync("What's the weather in New York?")
assert result is not None
assert result.output is not None
except ValueError as e:
pytest.skip(f"Provider {provider} not available: {e}")
def test_12_agent_with_result_validators(self, test_config):
"""Test Case 12: Agent with result type validation"""
try:
model = get_openai_model()
class NumberResponse(BaseModel):
"""A response containing a number"""
value: int = Field(ge=0, le=100, description="A number between 0 and 100")
explanation: str = Field(description="Explanation of the number")
agent = Agent(
model,
output_type=NumberResponse,
instructions="When asked for a number, provide a value between 0 and 100.",
)
result = agent.run_sync("Give me a random number for a dice roll (1-6).")
assert result is not None
assert result.output is not None
assert isinstance(result.output, NumberResponse)
assert 0 <= result.output.value <= 100
except Exception as e:
pytest.skip(f"Result validation through Pydantic AI not available: {e}")
def test_13_usage_tracking(self, test_config):
"""Test Case 13: Usage tracking and token counting"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Be concise.",
)
result = agent.run_sync("Say hello.")
assert result is not None
# Check usage information
usage = result.usage()
assert usage is not None
# Usage should have token counts
if hasattr(usage, 'total_tokens'):
assert usage.total_tokens > 0
elif hasattr(usage, 'input_tokens'):
assert usage.input_tokens > 0
except Exception as e:
pytest.skip(f"Usage tracking through Pydantic AI not available: {e}")
def test_14_message_history_inspection(self, test_config):
"""Test Case 14: Inspect message history after run"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Be helpful.",
)
result = agent.run_sync("What is 2 + 2?")
# Inspect all messages
messages = result.all_messages()
assert messages is not None
assert len(messages) >= 2 # At least request and response
# Should have user message and assistant response
message_kinds = [msg.kind for msg in messages]
assert "request" in message_kinds
assert "response" in message_kinds
except Exception as e:
pytest.skip(f"Message history inspection through Pydantic AI not available: {e}")
def test_15_dynamic_instructions(self, test_config):
"""Test Case 15: Dynamic instructions based on context"""
try:
model = get_openai_model()
@dataclass
class LanguageDeps:
language: str
agent = Agent(
model,
deps_type=LanguageDeps,
)
@agent.instructions
def dynamic_instructions(ctx: RunContext[LanguageDeps]) -> str:
return f"Always respond in {ctx.deps.language}. Be concise."
deps = LanguageDeps(language="English")
result = agent.run_sync("Say hello.", deps=deps)
assert result is not None
assert result.output is not None
# Response should be in English
content = str(result.output).lower()
assert any(word in content for word in ["hello", "hi", "greetings"])
except Exception as e:
pytest.skip(f"Dynamic instructions through Pydantic AI not available: {e}")
# Additional test class for edge cases
class TestPydanticAIEdgeCases:
"""Edge case tests for Pydantic AI integration"""
def test_empty_response_handling(self, test_config):
"""Test handling of potentially empty responses"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="If asked to say nothing, respond with a single space.",
)
result = agent.run_sync("Say as little as possible.")
# Should still get a valid result object
assert result is not None
except Exception as e:
pytest.skip(f"Empty response handling test not available: {e}")
def test_special_characters_in_prompt(self, test_config):
"""Test handling of special characters in prompts"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="Echo back special characters correctly.",
)
special_prompt = "Handle these: 你好 🎉 <tag> & \"quotes\" 'apostrophe'"
result = agent.run_sync(special_prompt)
assert result is not None
assert result.output is not None
except Exception as e:
pytest.skip(f"Special characters test not available: {e}")
def test_long_conversation_context(self, test_config):
"""Test handling of longer conversation context"""
try:
model = get_openai_model()
agent = Agent(
model,
instructions="You are a helpful assistant.",
)
# Build up conversation history
history = None
for i in range(3):
result = agent.run_sync(
f"Remember number {i + 1}.",
message_history=history,
)
history = result.all_messages()
# Final query should work with accumulated history
final_result = agent.run_sync(
"What numbers did I ask you to remember?",
message_history=history,
)
assert final_result is not None
assert final_result.output is not None
except Exception as e:
pytest.skip(f"Long conversation context test not available: {e}")

View File

@@ -0,0 +1 @@
# Utils package for shared test utilities

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,522 @@
"""
Configuration loader for Bifrost integration tests.
This module loads configuration from config.yml and provides utilities
for constructing integration URLs through the Bifrost gateway.
"""
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
import yaml
# Integration to provider mapping
# Maps integration names to their underlying provider configurations
INTEGRATION_TO_PROVIDER_MAP = {
"openai": "openai",
"anthropic": "anthropic",
"google": "gemini", # Google integration uses Gemini provider
"litellm": "openai", # LiteLLM defaults to OpenAI
"langchain": "openai", # LangChain defaults to OpenAI
"pydanticai": "openai", # Pydantic AI defaults to OpenAI
"bedrock": "bedrock", # Bedrock defaults to Amazon provider
"azure": "azure",
}
@dataclass
class BifrostConfig:
"""Bifrost gateway configuration"""
base_url: str
endpoints: Dict[str, str]
@dataclass
class IntegrationModels:
"""Model configuration for a integration"""
chat: str
vision: str
tools: str
alternatives: list
@dataclass
class TestConfig:
"""Complete test configuration"""
bifrost: BifrostConfig
api: Dict[str, Any]
models: Dict[str, IntegrationModels]
model_capabilities: Dict[str, Dict[str, Any]]
test_settings: Dict[str, Any]
integration_settings: Dict[str, Any]
environments: Dict[str, Any]
logging: Dict[str, Any]
class ConfigLoader:
"""Configuration loader for Bifrost integration tests"""
def __init__(self, config_path: Optional[str] = None):
"""Initialize configuration loader
Args:
config_path: Path to config.yml file. If None, looks for config.yml in project root.
"""
if config_path is None:
# Look for config.yml in project root
project_root = Path(__file__).parent.parent.parent
config_path = project_root / "config.yml"
self.config_path = Path(config_path)
self._config = None
self._load_config()
def _load_config(self):
"""Load configuration from YAML file"""
if not self.config_path.exists():
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
with open(self.config_path, "r") as f:
raw_config = yaml.safe_load(f)
# Expand environment variables
self._config = self._expand_env_vars(raw_config)
def _expand_env_vars(self, obj):
"""Recursively expand environment variables in configuration"""
if isinstance(obj, dict):
return {k: self._expand_env_vars(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [self._expand_env_vars(item) for item in obj]
elif isinstance(obj, str):
# Handle ${VAR:-default} syntax
import re
pattern = r"\$\{([^}]+)\}"
def replace_var(match):
var_expr = match.group(1)
if ":-" in var_expr:
var_name, default_value = var_expr.split(":-", 1)
return os.getenv(var_name, default_value)
else:
return os.getenv(var_expr, "")
return re.sub(pattern, replace_var, obj)
else:
return obj
def get_integration_url(self, integration: str) -> str:
"""Get the complete URL for a integration
Args:
integration: Integration name (openai, anthropic, google, litellm)
Returns:
Complete URL for the integration
Examples:
get_integration_url("openai") -> "http://localhost:8080/openai"
"""
bifrost_config = self._config["bifrost"]
base_url = bifrost_config["base_url"]
endpoint = bifrost_config["endpoints"].get(integration, "")
if not endpoint:
raise ValueError(f"No endpoint configured for integration: {integration}")
return f"{base_url.rstrip('/')}/{endpoint}"
def get_bifrost_config(self) -> BifrostConfig:
"""Get Bifrost configuration"""
bifrost_data = self._config["bifrost"]
return BifrostConfig(
base_url=bifrost_data["base_url"], endpoints=bifrost_data["endpoints"]
)
def get_model(self, integration: str, model_type: str = "chat") -> str:
"""Get model name for an integration and type
Maps integration names to provider configurations.
Args:
integration: Integration name (openai, anthropic, google, litellm, langchain)
model_type: Model type (chat, vision, tools, etc.)
Returns:
Model name for the integration and type
"""
# Map integration to provider
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
if not provider:
raise ValueError(
f"Unknown integration: {integration}. "
f"Valid integrations: {list(INTEGRATION_TO_PROVIDER_MAP.keys())}"
)
# Get model from provider configuration
return self.get_provider_model(provider, model_type)
def get_model_alternatives(self, integration: str) -> list:
"""Get alternative models for an integration"""
# Map integration to provider
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
if not provider:
return []
# Get alternatives from provider configuration
if "providers" not in self._config:
return []
if provider not in self._config["providers"]:
return []
return self._config["providers"][provider].get("alternatives", [])
def get_model_capabilities(self, model: str) -> Dict[str, Any]:
"""Get capabilities for a specific model"""
return self._config["model_capabilities"].get(
model,
{
"chat": True,
"tools": False,
"vision": False,
"max_tokens": 4096,
"context_window": 4096,
},
)
def supports_capability(self, model: str, capability: str) -> bool:
"""Check if a model supports a specific capability"""
caps = self.get_model_capabilities(model)
return caps.get(capability, False)
def get_api_config(self) -> Dict[str, Any]:
"""Get API configuration (timeout, retries, etc.)"""
return self._config["api"]
def get_test_settings(self) -> Dict[str, Any]:
"""Get test configuration settings"""
return self._config["test_settings"]
def get_integration_settings(self, integration: str) -> Dict[str, Any]:
"""Get integration-specific settings"""
return self._config["integration_settings"].get(integration, {})
def get_environment_config(self, environment: str | None = None) -> Dict[str, Any]:
"""Get environment-specific configuration
Args:
environment: Environment name (development, production, etc.)
If None, uses TEST_ENV environment variable or 'development'
"""
if environment is None:
environment = os.getenv("TEST_ENV", "development")
return self._config["environments"].get(environment, {})
def get_logging_config(self) -> Dict[str, Any]:
"""Get logging configuration"""
return self._config["logging"]
def list_integrations(self) -> list:
"""List all configured integrations"""
return list(INTEGRATION_TO_PROVIDER_MAP.keys())
def list_models(self, integration: str | None = None) -> Dict[str, Any]:
"""List all models for an integration or all integrations"""
if integration:
# Map integration to provider
provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
if not provider:
raise ValueError(f"Unknown integration: {integration}")
if "providers" not in self._config or provider not in self._config["providers"]:
raise ValueError(f"No provider configuration for: {provider}")
return {integration: self._config["providers"][provider]}
# Return all providers mapped to their integration names
result = {}
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
if "providers" in self._config and provider in self._config["providers"]:
result[integration] = self._config["providers"][provider]
return result
def validate_config(self) -> bool:
"""Validate configuration completeness"""
required_sections = ["bifrost", "providers", "api", "test_settings"]
for section in required_sections:
if section not in self._config:
raise ValueError(f"Missing required configuration section: {section}")
# Validate Bifrost configuration
bifrost = self._config["bifrost"]
if "base_url" not in bifrost or "endpoints" not in bifrost:
raise ValueError("Bifrost configuration missing base_url or endpoints")
# Validate that all integrations map to valid providers
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
if provider not in self._config["providers"]:
raise ValueError(
f"Integration '{integration}' maps to provider '{provider}' "
f"which is not configured in providers section"
)
return True
def print_config_summary(self):
"""Print a summary of the configuration"""
print("🔧 BIFROST INTEGRATION TEST CONFIGURATION")
print("=" * 80)
# Bifrost configuration
bifrost = self.get_bifrost_config()
print("\n🌉 BIFROST GATEWAY:")
print(f" Base URL: {bifrost.base_url}")
print(" Endpoints:")
for integration, endpoint in bifrost.endpoints.items():
full_url = f"{bifrost.base_url.rstrip('/')}/{endpoint}"
print(f" {integration}: {full_url}")
# Model configurations
print("\n🤖 MODEL CONFIGURATIONS (via providers):")
for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
if "providers" in self._config and provider in self._config["providers"]:
models = self._config["providers"][provider]
print(f" {integration.upper()}{provider}:")
print(f" Chat: {models.get('chat', 'N/A')}")
print(f" Vision: {models.get('vision', 'N/A')}")
print(f" Tools: {models.get('tools', 'N/A')}")
alternatives = models.get('alternatives', [])
print(f" Alternatives: {len(alternatives)} models")
# API settings
api_config = self.get_api_config()
print("\n⚙️ API SETTINGS:")
print(f" Timeout: {api_config['timeout']}s")
print(f" Max Retries: {api_config['max_retries']}")
print(f" Retry Delay: {api_config['retry_delay']}s")
print(f"\n✅ Configuration loaded successfully from: {self.config_path}")
def get_provider_model(self, provider: str, capability: str = "chat") -> str:
"""Get model name for a provider and capability
Args:
provider: Provider name (e.g., 'openai', 'anthropic', 'gemini')
capability: Capability type (default: 'chat')
Returns:
Model name suitable for the provider and capability
"""
if "providers" not in self._config:
# Fallback to old behavior if providers section doesn't exist
return ""
providers = self._config["providers"]
if provider not in providers:
return ""
provider_models = providers[provider]
return provider_models.get(capability, "")
def get_provider_api_key_env(self, provider: str) -> str:
"""Get the environment variable name for a provider's API key
Args:
provider: Provider name
Returns:
Environment variable name
"""
if "provider_api_keys" not in self._config:
return ""
return self._config["provider_api_keys"].get(provider, "")
def is_provider_available(self, provider: str) -> bool:
"""Check if a provider is available (has API key in environment)
Args:
provider: Provider name
Returns:
True if provider's API key is set in environment
"""
env_var = self.get_provider_api_key_env(provider)
if not env_var:
return False
api_key = os.getenv(env_var)
return api_key is not None and api_key.strip() != ""
def get_available_providers(self) -> List[str]:
"""Get list of providers that are available (have API keys configured)
Returns:
List of available provider names
"""
if "providers" not in self._config:
return []
available = []
for provider in self._config["providers"].keys():
if self.is_provider_available(provider):
available.append(provider)
return available
def provider_supports_scenario(self, provider: str, scenario: str) -> bool:
"""Check if a provider supports a specific test scenario
Args:
provider: Provider name
scenario: Scenario name
Returns:
True if provider supports the scenario
"""
if "provider_scenarios" not in self._config:
return False
if provider not in self._config["provider_scenarios"]:
return False
scenarios = self._config["provider_scenarios"][provider]
return scenarios.get(scenario, False)
def get_providers_for_scenario(self, scenario: str) -> List[str]:
"""Get list of available providers that support a specific scenario
Args:
scenario: Scenario name
Returns:
List of provider names that support the scenario
"""
available_providers = self.get_available_providers()
providers = []
for provider in available_providers:
if self.provider_supports_scenario(provider, scenario):
providers.append(provider)
return providers
def get_scenario_capability(self, scenario: str) -> str:
"""Get the capability type for a scenario
Args:
scenario: Scenario name
Returns:
Capability type (e.g., 'chat', 'vision', 'tools')
"""
if "scenario_capabilities" not in self._config:
return "chat" # Default
return self._config["scenario_capabilities"].get(scenario, "chat")
def get_virtual_key(self) -> str:
"""Get the virtual key value for testing
Returns:
Virtual key string or empty string if not configured
"""
if "virtual_key" not in self._config:
return ""
vk_config = self._config["virtual_key"]
if not vk_config.get("enabled", False):
return ""
return vk_config.get("value", "")
def is_virtual_key_configured(self) -> bool:
"""Check if virtual key testing is enabled and configured
Returns:
True if virtual key is available for testing
"""
vk = self.get_virtual_key()
return vk is not None and vk.strip() != ""
# Global configuration instance
_config_loader = None
def get_config() -> ConfigLoader:
"""Get global configuration instance"""
global _config_loader
if _config_loader is None:
_config_loader = ConfigLoader()
return _config_loader
def get_integration_url(integration: str) -> str:
return get_config().get_integration_url(integration)
def get_model(integration: str, model_type: str = "chat") -> str:
"""Convenience function to get model name"""
return get_config().get_model(integration, model_type)
def get_model_capabilities(model: str) -> Dict[str, Any]:
"""Convenience function to get model capabilities"""
return get_config().get_model_capabilities(model)
def supports_capability(model: str, capability: str) -> bool:
"""Convenience function to check model capability"""
return get_config().supports_capability(model, capability)
def get_provider_model(provider: str, capability: str = "chat") -> str:
"""Convenience function to get provider model"""
return get_config().get_provider_model(provider, capability)
def is_provider_available(provider: str) -> bool:
"""Convenience function to check provider availability"""
return get_config().is_provider_available(provider)
def get_available_providers() -> List[str]:
"""Convenience function to get available providers"""
return get_config().get_available_providers()
def provider_supports_scenario(provider: str, scenario: str) -> bool:
"""Convenience function to check scenario support"""
return get_config().provider_supports_scenario(provider, scenario)
def get_providers_for_scenario(scenario: str) -> List[str]:
"""Convenience function to get providers for scenario"""
return get_config().get_providers_for_scenario(scenario)
def get_virtual_key() -> str:
"""Convenience function to get virtual key"""
return get_config().get_virtual_key()
def is_virtual_key_configured() -> bool:
"""Convenience function to check if virtual key is configured"""
return get_config().is_virtual_key_configured()
if __name__ == "__main__":
# Print configuration summary when run directly
config = get_config()
config.validate_config()
config.print_config_summary()

View File

@@ -0,0 +1,66 @@
"""
Model configurations for each integration.
This file now acts as a compatibility layer and convenience wrapper
around the new configuration system in config.yml and config_loader.py.
All model data is now centralized in config.yml for easier maintenance.
"""
from typing import Dict, List
from dataclasses import dataclass
from .config_loader import get_config
@dataclass
class IntegrationModels:
"""Model configuration for a integration"""
chat: str # Primary chat model
vision: str # Vision/multimodal model
tools: str # Function calling model
alternatives: List[str] # Alternative models for testing
def get_integration_models() -> Dict[str, IntegrationModels]:
"""Get all integration model configurations from config.yml"""
config = get_config()
integration_models = {}
for integration in config.list_integrations():
models_config = config.list_models(integration)
integration_models[integration] = IntegrationModels(
chat=models_config["chat"],
vision=models_config["vision"],
tools=models_config["tools"],
alternatives=models_config["alternatives"],
)
return integration_models
# Backward compatibility - load from config
INTEGRATION_MODELS = get_integration_models()
def get_alternatives(integration: str) -> List[str]:
"""Get alternative models for a integration"""
config = get_config()
return config.get_model_alternatives(integration)
def list_all_models() -> Dict[str, Dict[str, str]]:
"""List all models by integration and type"""
config = get_config()
return config.list_models()
# Print model summary for documentation
def print_model_summary():
"""Print a summary of all models and their capabilities"""
config = get_config()
config.print_config_summary()
if __name__ == "__main__":
print_model_summary()

View File

@@ -0,0 +1,141 @@
"""
Parametrization utilities for cross-provider testing.
This module provides pytest parametrization for testing across multiple AI providers
with automatic scenario-based filtering.
"""
from typing import List, Tuple, Union
from .config_loader import get_config
def get_cross_provider_params_for_scenario(
scenario: str,
include_providers: List[str] | None = None,
exclude_providers: List[str] | None = None,
) -> List[Tuple[str, str]]:
config = get_config()
# Get providers that support this scenario
providers = config.get_providers_for_scenario(scenario)
# Apply include filter
if include_providers:
providers = [p for p in providers if p in include_providers]
# Apply exclude filter
if exclude_providers:
providers = [p for p in providers if p not in exclude_providers]
# Generate (provider, model) tuples
# Automatically maps: scenario → capability → model
params = []
for provider in sorted(providers): # Sort for consistent test ordering
# Map scenario to capability, then get model
capability = config.get_scenario_capability(scenario)
model = config.get_provider_model(provider, capability)
# Only add if provider has a model for this scenario's capability
if model:
params.append((provider, model))
# If no providers available, return a dummy tuple to avoid pytest errors
# The test will be skipped with appropriate message
if not params:
params = [("_no_providers_", "_no_model_")]
return params
def get_cross_provider_params_with_vk_for_scenario(
scenario: str,
include_providers: List[str] | None = None,
exclude_providers: List[str] | None = None,
) -> List[Tuple[str, str, bool]]:
"""
Get cross-provider parameters with virtual key flag for pytest parametrization.
When virtual key is configured, each provider/model combo is tested twice:
once without VK (vk_enabled=False) and once with VK (vk_enabled=True).
Args:
scenario: Test scenario name
include_providers: Optional list of providers to include
exclude_providers: Optional list of providers to exclude
Returns:
List of (provider, model, vk_enabled) tuples
Example:
When VK is configured:
[
("openai", "gpt-4o", False),
("openai", "gpt-4o", True),
("anthropic", "claude-3", False),
("anthropic", "claude-3", True),
]
"""
config = get_config()
# Get base params without VK
base_params = get_cross_provider_params_for_scenario(
scenario, include_providers, exclude_providers
)
# Handle the dummy tuple case
if base_params == [("_no_providers_", "_no_model_")]:
return [("_no_providers_", "_no_model_", False)]
# Build params list with VK flag
params = []
vk_configured = config.is_virtual_key_configured()
for provider, model in base_params:
# Always add the non-VK variant
params.append((provider, model, False))
# Add VK variant only if VK is configured
if vk_configured:
params.append((provider, model, True))
return params
def format_vk_test_id(provider: str, model: str, vk_enabled: bool) -> str:
"""
Format test ID for virtual key parameterized tests.
Args:
provider: Provider name
model: Model name
vk_enabled: Whether VK is enabled
Returns:
Formatted test ID string
Example:
>>> format_vk_test_id("openai", "gpt-4o", True)
"openai-gpt-4o-with_vk"
>>> format_vk_test_id("openai", "gpt-4o", False)
"openai-gpt-4o-no_vk"
"""
vk_suffix = "with_vk" if vk_enabled else "no_vk"
return f"{provider}-{model}-{vk_suffix}"
def format_provider_model(provider: str, model: str) -> str:
"""
Format provider and model into the standard "provider/model" format.
Args:
provider: Provider name
model: Model name
Returns:
Formatted string "provider/model"
Example:
>>> format_provider_model("openai", "gpt-4o")
"openai/gpt-4o"
"""
return f"{provider}/{model}"

5583
tests/integrations/python/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,340 @@
# Bifrost TypeScript Integration Tests
TypeScript/JavaScript integration test suite for testing AI providers through Bifrost proxy. This test suite uses Vitest and provides comprehensive coverage across multiple AI SDKs.
## Quick Start
```bash
# 1. Install dependencies
cd bifrost/tests/integrations/typescript
npm install
# 2. Set environment variables
export BIFROST_BASE_URL="http://localhost:8080"
export OPENAI_API_KEY="your-key"
export ANTHROPIC_API_KEY="your-key"
export GEMINI_API_KEY="your-key"
# 3. Run tests
npm test # All tests
npm test -- tests/test-openai.test.ts # Specific SDK
npm test -- -t "Simple Chat" # By pattern
```
## Architecture Overview
The TypeScript integration tests use the same centralized configuration as the Python tests, routing all AI requests through Bifrost:
```text
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ Test Client │───▶│ Bifrost Gateway │───▶│ AI Provider │
│ (TypeScript) │ │ localhost:8080 │ │ (OpenAI, etc.) │
└─────────────────┘ └─────────────────┘ └─────────────────┘
```
## Supported SDKs
| SDK | Package | Features |
|-----|---------|----------|
| **OpenAI** | `openai` | Chat, Streaming, Tools, Vision, Speech, Embeddings |
| **Anthropic** | `@anthropic-ai/sdk` | Chat, Streaming, Tools, Vision, Thinking |
| **Google GenAI** | `@google/generative-ai` | Chat, Streaming, Tools, Vision, Embeddings |
| **LangChain.js** | `@langchain/*` | Chat, Streaming, Tools, Structured Output |
## Test Scenarios
Each SDK test file covers these scenarios where supported:
### Core Chat
1. **Simple Chat** - Basic single-message conversations
2. **Multi-turn Conversation** - Context retention across messages
3. **Streaming Chat** - Real-time streaming responses
### Tool Calling
4. **Single Tool Call** - Basic function calling
5. **Multiple Tool Calls** - Multiple tools in single request
6. **End-to-End Tool Calling** - Complete workflow with results
### Vision
7. **Image URL** - Image analysis from URLs
8. **Image Base64** - Image analysis from base64 data
9. **Multiple Images** - Multi-image comparison
### Advanced Features
10. **Speech Synthesis** - Text-to-speech (OpenAI)
11. **Transcription** - Speech-to-text (OpenAI)
12. **Embeddings** - Text-to-vector conversion
13. **Structured Output** - Schema-based responses
14. **Thinking/Reasoning** - Extended reasoning modes
## Directory Structure
```text
typescript/
├── package.json # Dependencies and scripts
├── tsconfig.json # TypeScript configuration
├── vitest.config.ts # Vitest test configuration
├── config.yml # Shared config (mirrors ../python/config.yml)
├── README.md # This file
├── src/
│ └── utils/
│ ├── config-loader.ts # Configuration loading
│ ├── common.ts # Test data and assertions
│ ├── parametrize.ts # Cross-provider utilities
│ └── index.ts # Barrel export
└── tests/
├── setup.ts # Global test setup
├── test-openai.test.ts # OpenAI SDK tests
├── test-anthropic.test.ts # Anthropic SDK tests
├── test-google.test.ts # Google GenAI tests
└── test-langchain.test.ts # LangChain.js tests
```
## Configuration
### Shared Configuration
The TypeScript tests share configuration with Python tests. The `config.yml` file mirrors the Python test configuration to ensure consistency:
```bash
# Both test suites use the same configuration format
tests/integrations/typescript/config.yml # TypeScript tests
tests/integrations/python/config.yml # Python tests
```
This ensures consistent:
- Provider model configurations
- Scenario capability mappings
- API settings (timeouts, retries)
- Virtual key settings
### Environment Variables
**Required:**
```bash
export BIFROST_BASE_URL="http://localhost:8080"
```
**Provider API Keys (at least one required):**
```bash
export OPENAI_API_KEY="sk-..."
export ANTHROPIC_API_KEY="sk-ant-..."
export GEMINI_API_KEY="AIza..."
```
**Optional:**
```bash
export AWS_ACCESS_KEY_ID="..." # For Bedrock
export AWS_SECRET_ACCESS_KEY="..."
export COHERE_API_KEY="..."
```
## Running Tests
### Using npm scripts
```bash
# Run all tests
npm test
# Run tests with verbose output
npm test -- --reporter=verbose
# Run tests in watch mode
npm run test:watch
# Run with coverage
npm run test:coverage
# Run with UI
npm run test:ui
```
### Filtering tests
```bash
# Run specific test file
npm test -- tests/test-openai.test.ts
# Run tests matching pattern
npm test -- -t "Simple Chat"
npm test -- -t "Tool"
npm test -- -t "Streaming"
# Run tests for specific provider
npm test -- tests/test-anthropic.test.ts -t "Streaming"
```
### Using Makefile
From the repository root:
```bash
# Run TypeScript integration tests
make test-integrations LANG=ts
# Run specific SDK tests
make test-integrations LANG=ts INTEGRATION=openai
# Run with pattern
make test-integrations LANG=ts PATTERN="tool"
# Verbose output
make test-integrations LANG=ts VERBOSE=1
```
## Cross-Provider Testing
The OpenAI test file supports cross-provider testing through Bifrost's model name routing. By formatting the model name as `provider/model`, Bifrost routes the request to the appropriate provider:
```typescript
import { formatProviderModel } from '../src/utils'
const client = new OpenAI({
baseURL: 'http://localhost:8080/openai',
apiKey: 'your-api-key',
})
// Route to Anthropic using the model name format
const response = await client.chat.completions.create({
model: formatProviderModel('anthropic', 'claude-sonnet-4-20250514'),
// Results in: "anthropic/claude-sonnet-4-20250514"
messages: [{ role: 'user', content: 'Hello' }],
})
// Route to Bedrock
const bedrockResponse = await client.chat.completions.create({
model: formatProviderModel('bedrock', 'global.anthropic.claude-sonnet-4-20250514-v1:0'),
// Results in: "bedrock/global.anthropic.claude-sonnet-4-20250514-v1:0"
messages: [{ role: 'user', content: 'Hello' }],
})
```
This allows testing any provider using the OpenAI SDK format while Bifrost handles the routing based on the model name prefix.
## Writing New Tests
### Basic Test Structure
```typescript
import { describe, it, expect } from 'vitest'
import OpenAI from 'openai'
import { getIntegrationUrl, getProviderModel } from '../src/utils'
describe('My Feature Tests', () => {
it('should do something', async () => {
const client = new OpenAI({
baseURL: getIntegrationUrl('openai'),
apiKey: process.env.OPENAI_API_KEY,
})
const response = await client.chat.completions.create({
model: getProviderModel('openai', 'chat'),
messages: [{ role: 'user', content: 'Hello' }],
})
expect(response.choices[0].message.content).toBeDefined()
})
})
```
### Using Test Utilities
```typescript
import {
SIMPLE_CHAT_MESSAGES,
WEATHER_TOOL,
assertValidChatResponse,
assertHasToolCalls,
convertToOpenAITools,
} from '../src/utils'
// Use predefined test messages
const response = await client.chat.completions.create({
model,
messages: SIMPLE_CHAT_MESSAGES,
})
// Use assertion helpers
assertValidChatResponse(response)
assertHasToolCalls(response, 1)
// Use tool conversion utilities
const tools = convertToOpenAITools([WEATHER_TOOL])
```
### Cross-Provider Parametrization
```typescript
import { getCrossProviderParamsWithVkForScenario } from '../src/utils'
describe('Cross-Provider Tests', () => {
const testCases = getCrossProviderParamsWithVkForScenario('simple_chat')
it.each(testCases)(
'should work - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }) => {
// Test implementation
}
)
})
```
## Troubleshooting
### Common Issues
**1. Connection Refused**
```text
Error: connect ECONNREFUSED 127.0.0.1:8080
```
Solution: Ensure Bifrost is running on the expected port.
**2. API Key Not Set**
```text
Error: OPENAI_API_KEY environment variable not set
```
Solution: Set the required environment variables.
**3. Timeout Errors**
```text
Error: Timeout of 300000ms exceeded
```
Solution: Check network connectivity and Bifrost logs.
### Debug Mode
```bash
# Run with debug output
DEBUG=* npm test -- tests/test-openai.test.ts
# Check Bifrost logs
tail -f /tmp/bifrost-test.log
```
## Integration with Python Tests
The TypeScript and Python test suites share:
- **Configuration** (`config.yml`) - Same provider/model settings
- **Test Scenarios** - Same test categories and assertions
- **Makefile Integration** - Unified `test-integrations` command
To run both:
```bash
# Python tests
make test-integrations-py
# TypeScript tests
make test-integrations-ts
# Both
make test-integrations-py && make test-integrations-ts
```
## Contributing
1. Follow the existing test structure
2. Use the shared utilities from `src/utils/`
3. Add tests for all applicable scenarios
4. Ensure tests pass locally before submitting
5. Update this README if adding new SDKs or features

View File

@@ -0,0 +1,225 @@
{
"$schema": "https://www.getbifrost.ai/schema",
"providers": {
"openai": {
"keys": [
{
"name": "OpenAI API Key",
"value": "env.OPENAI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"anthropic": {
"keys": [
{
"name": "Anthropic API Key",
"value": "env.ANTHROPIC_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"gemini": {
"keys": [
{
"name": "Gemini API Key",
"value": "env.GEMINI_API_KEY",
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"vertex": {
"keys": [
{
"name": "Vertex API Key",
"vertex_key_config": {
"project_id": "env.GOOGLE_PROJECT_ID",
"region": "env.GOOGLE_LOCATION"
},
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"mistral": {
"keys": [
{
"name": "Mistral API Key",
"value": "env.MISTRAL_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"cohere": {
"keys": [
{
"name": "Cohere API Key",
"value": "env.COHERE_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"groq": {
"keys": [
{
"name": "Groq API Key",
"value": "env.GROQ_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"perplexity": {
"keys": [
{
"name": "Perplexity API Key",
"value": "env.PERPLEXITY_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"cerebras": {
"keys": [
{
"name": "Cerebras API Key",
"value": "env.CEREBRAS_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"openrouter": {
"keys": [
{
"name": "OpenRouter API Key",
"value": "env.OPENROUTER_API_KEY",
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"azure": {
"keys": [
{
"name": "Azure OpenAI API Key",
"value": "env.AZURE_OPENAI_API_KEY",
"azure_key_config": {
"endpoint": "env.AZURE_OPENAI_ENDPOINT",
"api_version": "env.AZURE_OPENAI_API_VERSION"
},
"weight": 1,
"models": ["*"]
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
},
"bedrock": {
"keys": [
{
"name": "Bedrock API Key",
"bedrock_key_config": {
"access_key": "env.AWS_ACCESS_KEY_ID",
"secret_key": "env.AWS_SECRET_ACCESS_KEY",
"region": "env.AWS_REGION",
"arn": "env.AWS_ARN"
},
"weight": 1,
"models": ["*"],
"use_for_batch_api": true
}
],
"network_config": {
"default_request_timeout_in_seconds": 300
}
}
},
"config_store": {
"enabled": true,
"type": "sqlite",
"config": {
"path": "../../tests/integrations/typescript/config.db"
}
},
"logs_store": {
"enabled": true,
"type": "sqlite",
"config": {
"path": "../../tests/integrations/typescript/logs.db"
}
},
"governance": {
"virtual_keys": [
{
"id": "vk-test",
"value": "sk-bf-test-key",
"is_active": true,
"provider_configs": [
{ "provider": "openai", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "anthropic", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "gemini", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "vertex", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "mistral", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "cohere", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "groq", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "perplexity", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "cerebras", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "openrouter", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "azure", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
{ "provider": "bedrock", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 }
]
}
]
},
"client": {
"drop_excess_requests": false,
"initial_pool_size": 300,
"allowed_origins": [
"*"
],
"enable_logging": true,
"enforce_auth_on_inference": false,
"allow_direct_keys": false,
"max_request_body_size_mb": 100
}
}

View File

@@ -0,0 +1 @@
../python/config.yml

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
{
"name": "bifrost-integration-tests-typescript",
"version": "0.1.0",
"description": "TypeScript integration tests for Bifrost AI gateway",
"type": "module",
"scripts": {
"test": "vitest run",
"test:watch": "vitest",
"test:coverage": "vitest run --coverage",
"test:ui": "vitest --ui",
"typecheck": "tsc --noEmit",
"lint": "eslint src tests --ext .ts"
},
"devDependencies": {
"@types/node": "^22.10.0",
"@typescript-eslint/eslint-plugin": "^8.0.0",
"@typescript-eslint/parser": "^8.0.0",
"@vitest/coverage-v8": "^2.1.0",
"@vitest/ui": "^2.1.0",
"dotenv": "^16.4.0",
"eslint": "^9.0.0",
"typescript": "^5.7.0",
"vitest": "^2.1.0"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.71.2",
"@aws-sdk/client-bedrock": "^3.966.0",
"@aws-sdk/client-bedrock-runtime": "^3.965.0",
"@google/generative-ai": "^0.24.1",
"@langchain/anthropic": "^1.3.26",
"@langchain/core": "^1.1.39",
"@langchain/google-genai": "^2.1.26",
"@langchain/openai": "^1.4.4",
"langsmith": "^0.5.19",
"openai": "^6.15.0",
"yaml": "^2.6.0",
"zod": "^3.24.0"
},
"engines": {
"node": ">=25.0.0"
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,477 @@
/**
* Configuration loader for Bifrost integration tests.
*
* This module loads configuration from config.yml and provides utilities
* for constructing integration URLs through the Bifrost gateway.
*/
import { readFileSync, existsSync } from 'fs'
import { resolve, dirname } from 'path'
import { fileURLToPath } from 'url'
import { parse as parseYaml } from 'yaml'
// Get __dirname equivalent for ES modules
const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)
// Integration to provider mapping
// Maps integration names to their underlying provider configurations
export const INTEGRATION_TO_PROVIDER_MAP: Record<string, string> = {
openai: 'openai',
anthropic: 'anthropic',
google: 'gemini', // Google integration uses Gemini provider
litellm: 'openai', // LiteLLM defaults to OpenAI
langchain: 'openai', // LangChain defaults to OpenAI
pydanticai: 'openai', // Pydantic AI defaults to OpenAI
bedrock: 'bedrock', // Bedrock defaults to Amazon provider
azure: 'azure',
}
export interface BifrostConfig {
base_url: string
endpoints: Record<string, string>
}
export interface ApiConfig {
timeout: number
max_retries: number
retry_delay: number
}
export interface TestSettings {
max_tokens: Record<string, number | null>
timeouts: Record<string, number>
retries: {
max_attempts: number
delay: number
}
}
export interface ProviderScenarios {
[scenario: string]: boolean
}
export interface RawConfig {
bifrost: BifrostConfig
api: ApiConfig
providers: Record<string, Record<string, string | string[]>>
provider_api_keys: Record<string, string>
provider_scenarios: Record<string, ProviderScenarios>
scenario_capabilities: Record<string, string>
model_capabilities: Record<string, Record<string, unknown>>
test_settings: TestSettings
integration_settings: Record<string, Record<string, unknown>>
environments: Record<string, Record<string, unknown>>
logging: Record<string, unknown>
virtual_key?: {
enabled: boolean
value: string
}
}
class ConfigLoader {
private config: RawConfig | null = null
private configPath: string
constructor(configPath?: string) {
if (configPath) {
this.configPath = configPath
} else {
// Look for config.yml in project root (symlinked from python)
this.configPath = resolve(__dirname, '../../config.yml')
}
this.loadConfig()
}
private loadConfig(): void {
if (!existsSync(this.configPath)) {
throw new Error(`Configuration file not found: ${this.configPath}`)
}
const rawContent = readFileSync(this.configPath, 'utf-8')
let rawConfig: unknown
try {
rawConfig = parseYaml(rawContent)
} catch (e) {
throw new Error(`Failed to parse YAML config at ${this.configPath}: ${String(e)}`)
}
if (rawConfig == null || typeof rawConfig !== 'object') {
throw new Error(`Invalid YAML config at ${this.configPath}: expected a top-level object`)
}
// Expand environment variables
this.config = this.expandEnvVars(rawConfig) as RawConfig
}
private expandEnvVars(obj: unknown): unknown {
if (typeof obj === 'object' && obj !== null) {
if (Array.isArray(obj)) {
return obj.map((item) => this.expandEnvVars(item))
}
const result: Record<string, unknown> = {}
for (const [key, value] of Object.entries(obj)) {
result[key] = this.expandEnvVars(value)
}
return result
}
if (typeof obj === 'string') {
// Handle ${VAR:-default} syntax
return obj.replace(/\$\{([^}]+)\}/g, (_, varExpr: string) => {
if (varExpr.includes(':-')) {
const [varName, defaultValue] = varExpr.split(':-')
return process.env[varName] || defaultValue
}
return process.env[varExpr] || ''
})
}
return obj
}
getIntegrationUrl(integration: string): string {
if (!this.config) throw new Error('Config not loaded')
const bifrostConfig = this.config.bifrost
const baseUrl = bifrostConfig.base_url
const endpoint = bifrostConfig.endpoints[integration]
if (!endpoint) {
throw new Error(`No endpoint configured for integration: ${integration}`)
}
// Normalize URL to avoid double slashes
const base = baseUrl.replace(/\/+$/, '')
const ep = String(endpoint).replace(/^\/+/, '')
return `${base}/${ep}`
}
getBifrostConfig(): BifrostConfig {
if (!this.config) throw new Error('Config not loaded')
return this.config.bifrost
}
getModel(integration: string, modelType: string = 'chat'): string {
// Map integration to provider
const provider = INTEGRATION_TO_PROVIDER_MAP[integration]
if (!provider) {
throw new Error(
`Unknown integration: ${integration}. Valid integrations: ${Object.keys(INTEGRATION_TO_PROVIDER_MAP).join(', ')}`
)
}
// Get model from provider configuration
return this.getProviderModel(provider, modelType)
}
getModelAlternatives(integration: string): string[] {
const provider = INTEGRATION_TO_PROVIDER_MAP[integration]
if (!provider || !this.config?.providers?.[provider]) {
return []
}
const alternatives = this.config.providers[provider].alternatives
return Array.isArray(alternatives) ? alternatives : []
}
getModelCapabilities(model: string): Record<string, unknown> {
if (!this.config) throw new Error('Config not loaded')
return (
this.config.model_capabilities[model] || {
chat: true,
tools: false,
vision: false,
max_tokens: 4096,
context_window: 4096,
}
)
}
supportsCapability(model: string, capability: string): boolean {
const caps = this.getModelCapabilities(model)
return caps[capability] === true
}
getApiConfig(): ApiConfig {
if (!this.config) throw new Error('Config not loaded')
return this.config.api
}
getTestSettings(): TestSettings {
if (!this.config) throw new Error('Config not loaded')
return this.config.test_settings
}
getIntegrationSettings(integration: string): Record<string, unknown> {
if (!this.config) throw new Error('Config not loaded')
return this.config.integration_settings[integration] || {}
}
getEnvironmentConfig(environment?: string): Record<string, unknown> {
if (!this.config) throw new Error('Config not loaded')
const env = environment || process.env.TEST_ENV || 'development'
return this.config.environments[env] || {}
}
getLoggingConfig(): Record<string, unknown> {
if (!this.config) throw new Error('Config not loaded')
return this.config.logging
}
listIntegrations(): string[] {
return Object.keys(INTEGRATION_TO_PROVIDER_MAP)
}
listModels(integration?: string): Record<string, unknown> {
if (!this.config) throw new Error('Config not loaded')
if (integration) {
const provider = INTEGRATION_TO_PROVIDER_MAP[integration]
if (!provider) {
throw new Error(`Unknown integration: ${integration}`)
}
if (!this.config.providers?.[provider]) {
throw new Error(`No provider configuration for: ${provider}`)
}
return { [integration]: this.config.providers[provider] }
}
// Return all providers mapped to their integration names
const result: Record<string, unknown> = {}
for (const [integ, provider] of Object.entries(INTEGRATION_TO_PROVIDER_MAP)) {
if (this.config.providers?.[provider]) {
result[integ] = this.config.providers[provider]
}
}
return result
}
validateConfig(): boolean {
if (!this.config) throw new Error('Config not loaded')
const requiredSections = ['bifrost', 'providers', 'api', 'test_settings']
for (const section of requiredSections) {
if (!(section in this.config)) {
throw new Error(`Missing required configuration section: ${section}`)
}
}
// Validate Bifrost configuration
const bifrost = this.config.bifrost
if (!bifrost.base_url || !bifrost.endpoints) {
throw new Error('Bifrost configuration missing base_url or endpoints')
}
// Validate that all integrations map to valid providers
for (const [integration, provider] of Object.entries(INTEGRATION_TO_PROVIDER_MAP)) {
if (!this.config.providers[provider]) {
throw new Error(
`Integration '${integration}' maps to provider '${provider}' which is not configured in providers section`
)
}
}
return true
}
printConfigSummary(): void {
if (!this.config) throw new Error('Config not loaded')
console.log('🔧 BIFROST INTEGRATION TEST CONFIGURATION (TypeScript)')
console.log('='.repeat(80))
// Bifrost configuration
const bifrost = this.getBifrostConfig()
console.log('\n🌉 BIFROST GATEWAY:')
console.log(` Base URL: ${bifrost.base_url}`)
console.log(' Endpoints:')
for (const [integration, endpoint] of Object.entries(bifrost.endpoints)) {
const fullUrl = `${bifrost.base_url.replace(/\/$/, '')}/${endpoint}`
console.log(` ${integration}: ${fullUrl}`)
}
// Model configurations
console.log('\n🤖 MODEL CONFIGURATIONS (via providers):')
for (const [integration, provider] of Object.entries(INTEGRATION_TO_PROVIDER_MAP)) {
if (this.config.providers?.[provider]) {
const models = this.config.providers[provider]
console.log(` ${integration.toUpperCase()}${provider}:`)
console.log(` Chat: ${models.chat || 'N/A'}`)
console.log(` Vision: ${models.vision || 'N/A'}`)
console.log(` Tools: ${models.tools || 'N/A'}`)
const alternatives = models.alternatives
console.log(` Alternatives: ${Array.isArray(alternatives) ? alternatives.length : 0} models`)
}
}
// API settings
const apiConfig = this.getApiConfig()
console.log('\n⚙ API SETTINGS:')
console.log(` Timeout: ${apiConfig.timeout}s`)
console.log(` Max Retries: ${apiConfig.max_retries}`)
console.log(` Retry Delay: ${apiConfig.retry_delay}s`)
console.log(`\n✅ Configuration loaded successfully from: ${this.configPath}`)
}
getProviderModel(provider: string, capability: string = 'chat'): string {
if (!this.config?.providers) {
return ''
}
const providerModels = this.config.providers[provider]
if (!providerModels) {
return ''
}
const model = providerModels[capability]
return typeof model === 'string' ? model : ''
}
getProviderApiKeyEnv(provider: string): string {
if (!this.config?.provider_api_keys) {
return ''
}
return this.config.provider_api_keys[provider] || ''
}
isProviderAvailable(provider: string): boolean {
const envVar = this.getProviderApiKeyEnv(provider)
if (!envVar) {
return false
}
const apiKey = process.env[envVar]
return apiKey !== undefined && apiKey.trim() !== ''
}
getAvailableProviders(): string[] {
if (!this.config?.providers) {
return []
}
const available: string[] = []
for (const provider of Object.keys(this.config.providers)) {
if (this.isProviderAvailable(provider)) {
available.push(provider)
}
}
return available
}
providerSupportsScenario(provider: string, scenario: string): boolean {
if (!this.config?.provider_scenarios?.[provider]) {
return false
}
return this.config.provider_scenarios[provider][scenario] === true
}
getProvidersForScenario(scenario: string): string[] {
const availableProviders = this.getAvailableProviders()
const providers: string[] = []
for (const provider of availableProviders) {
if (this.providerSupportsScenario(provider, scenario)) {
providers.push(provider)
}
}
return providers
}
getScenarioCapability(scenario: string): string {
if (!this.config?.scenario_capabilities) {
return 'chat'
}
return this.config.scenario_capabilities[scenario] || 'chat'
}
getVirtualKey(): string {
if (!this.config?.virtual_key?.enabled) {
return ''
}
return this.config.virtual_key.value || ''
}
isVirtualKeyConfigured(): boolean {
const vk = this.getVirtualKey()
return vk.trim() !== ''
}
}
// Global configuration instance
let configLoader: ConfigLoader | null = null
export function getConfig(): ConfigLoader {
if (!configLoader) {
configLoader = new ConfigLoader()
}
return configLoader
}
export function getIntegrationUrl(integration: string): string {
return getConfig().getIntegrationUrl(integration)
}
export function getModel(integration: string, modelType: string = 'chat'): string {
return getConfig().getModel(integration, modelType)
}
export function getModelCapabilities(model: string): Record<string, unknown> {
return getConfig().getModelCapabilities(model)
}
export function supportsCapability(model: string, capability: string): boolean {
return getConfig().supportsCapability(model, capability)
}
export function getProviderModel(provider: string, capability: string = 'chat'): string {
return getConfig().getProviderModel(provider, capability)
}
export function isProviderAvailable(provider: string): boolean {
return getConfig().isProviderAvailable(provider)
}
export function getAvailableProviders(): string[] {
return getConfig().getAvailableProviders()
}
export function providerSupportsScenario(provider: string, scenario: string): boolean {
return getConfig().providerSupportsScenario(provider, scenario)
}
export function getProvidersForScenario(scenario: string): string[] {
return getConfig().getProvidersForScenario(scenario)
}
export function getVirtualKey(): string {
return getConfig().getVirtualKey()
}
export function isVirtualKeyConfigured(): boolean {
return getConfig().isVirtualKeyConfigured()
}
export function getApiConfig(): ApiConfig {
return getConfig().getApiConfig()
}
export function getTestSettings(): TestSettings {
return getConfig().getTestSettings()
}
export function getIntegrationSettings(integration: string): Record<string, unknown> {
return getConfig().getIntegrationSettings(integration)
}
// Export class for direct use if needed
export { ConfigLoader }

View File

@@ -0,0 +1,12 @@
/**
* Barrel export for all utility modules
*/
// Config loader
export * from './config-loader'
// Common test utilities
export * from './common'
// Parametrization utilities
export * from './parametrize'

View File

@@ -0,0 +1,202 @@
/**
* Parametrization utilities for cross-provider testing.
*
* This module provides utilities for testing across multiple AI providers
* with automatic scenario-based filtering.
*/
import { getConfig } from './config-loader'
export interface ProviderModelParam {
provider: string
model: string
}
export interface ProviderModelVkParam extends ProviderModelParam {
vkEnabled: boolean
}
/**
* Get cross-provider parameters for a specific scenario.
*
* @param scenario - Test scenario name
* @param includeProviders - Optional list of providers to include
* @param excludeProviders - Optional list of providers to exclude
* @returns Array of [provider, model] tuples for test parametrization
*/
export function getCrossProviderParamsForScenario(
scenario: string,
includeProviders?: string[],
excludeProviders?: string[]
): ProviderModelParam[] {
const config = getConfig()
// Get providers that support this scenario
let providers = config.getProvidersForScenario(scenario)
// Apply include filter
if (includeProviders && includeProviders.length > 0) {
providers = providers.filter((p) => includeProviders.includes(p))
}
// Apply exclude filter
if (excludeProviders && excludeProviders.length > 0) {
providers = providers.filter((p) => !excludeProviders.includes(p))
}
// Generate { provider, model } objects
// Automatically maps: scenario → capability → model
const params: ProviderModelParam[] = []
for (const provider of providers.sort()) {
// Map scenario to capability, then get model
const capability = config.getScenarioCapability(scenario)
const model = config.getProviderModel(provider, capability)
// Only add if provider has a model for this scenario's capability
if (model) {
params.push({ provider, model })
}
}
// If no providers available, return a dummy tuple to avoid test errors
// The test will be skipped with appropriate message
if (params.length === 0) {
params.push({ provider: '_no_providers_', model: '_no_model_' })
}
return params
}
/**
* Get cross-provider parameters with virtual key flag for test parametrization.
*
* When virtual key is configured, each provider/model combo is tested twice:
* once without VK (vkEnabled=false) and once with VK (vkEnabled=true).
*
* @param scenario - Test scenario name
* @param includeProviders - Optional list of providers to include
* @param excludeProviders - Optional list of providers to exclude
* @returns Array of { provider, model, vkEnabled } objects
*/
export function getCrossProviderParamsWithVkForScenario(
scenario: string,
includeProviders?: string[],
excludeProviders?: string[]
): ProviderModelVkParam[] {
const config = getConfig()
// Get base params without VK
const baseParams = getCrossProviderParamsForScenario(scenario, includeProviders, excludeProviders)
// Handle the dummy tuple case
if (baseParams.length === 1 && baseParams[0].provider === '_no_providers_') {
return [{ provider: '_no_providers_', model: '_no_model_', vkEnabled: false }]
}
// Build params list with VK flag
const params: ProviderModelVkParam[] = []
const vkConfigured = config.isVirtualKeyConfigured()
for (const { provider, model } of baseParams) {
// Always add the non-VK variant
params.push({ provider, model, vkEnabled: false })
// Add VK variant only if VK is configured
if (vkConfigured) {
params.push({ provider, model, vkEnabled: true })
}
}
return params
}
/**
* Format test ID for virtual key parameterized tests.
*
* @param provider - Provider name
* @param model - Model name
* @param vkEnabled - Whether VK is enabled
* @returns Formatted test ID string
*/
export function formatVkTestId(provider: string, model: string, vkEnabled: boolean): string {
const vkSuffix = vkEnabled ? 'with_vk' : 'no_vk'
return `${provider}-${model}-${vkSuffix}`
}
/**
* Format provider and model into the standard "provider/model" format.
*
* @param provider - Provider name
* @param model - Model name
* @returns Formatted string "provider/model"
*/
export function formatProviderModel(provider: string, model: string): string {
return `${provider}/${model}`
}
/**
* Helper to check if test should be skipped due to no providers.
*/
export function shouldSkipNoProviders(params: ProviderModelParam | ProviderModelVkParam): boolean {
return params.provider === '_no_providers_'
}
/**
* Get test cases for Vitest's describe.each or it.each.
*
* Returns an array suitable for use with Vitest's parametrization.
*
* @example
* ```typescript
* const testCases = getTestCasesForScenario('simple_chat')
* describe.each(testCases)('Simple Chat - $provider', ({ provider, model }) => {
* it('should complete a simple chat', async () => {
* // test implementation
* })
* })
* ```
*/
export function getTestCasesForScenario(
scenario: string,
includeProviders?: string[],
excludeProviders?: string[]
): ProviderModelParam[] {
return getCrossProviderParamsForScenario(scenario, includeProviders, excludeProviders)
}
/**
* Get test cases with VK variants for Vitest's describe.each or it.each.
*
* @example
* ```typescript
* const testCases = getTestCasesWithVkForScenario('simple_chat')
* describe.each(testCases)('Simple Chat - $provider (VK: $vkEnabled)', ({ provider, model, vkEnabled }) => {
* it('should complete a simple chat', async () => {
* // test implementation
* })
* })
* ```
*/
export function getTestCasesWithVkForScenario(
scenario: string,
includeProviders?: string[],
excludeProviders?: string[]
): ProviderModelVkParam[] {
return getCrossProviderParamsWithVkForScenario(scenario, includeProviders, excludeProviders)
}
/**
* Create a test name with provider and model info.
*/
export function createTestName(baseName: string, provider: string, model: string): string {
return `${baseName} [${provider}/${model}]`
}
/**
* Create a test name with provider, model, and VK info.
*/
export function createTestNameWithVk(baseName: string, provider: string, model: string, vkEnabled: boolean): string {
const vkSuffix = vkEnabled ? ' (with VK)' : ''
return `${baseName} [${provider}/${model}]${vkSuffix}`
}

View File

@@ -0,0 +1,60 @@
/**
* Global test setup for Vitest
*
* This file is loaded before all tests run.
* It sets up environment variables and global configuration.
*/
import { config } from 'dotenv'
import { resolve, dirname } from 'path'
import { fileURLToPath } from 'url'
// ES module compatibility - __dirname is not available in ESM
const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)
// Load environment variables from .env file in project root
config({ path: resolve(__dirname, '../.env') })
// Also try loading from workspace root
config({ path: resolve(__dirname, '../../../../.env') })
// Set default environment variables if not present
if (!process.env.BIFROST_BASE_URL) {
process.env.BIFROST_BASE_URL = 'http://localhost:8080'
}
// Log test environment info
console.log('\n🧪 Bifrost TypeScript Integration Tests')
console.log('='.repeat(50))
console.log(`📍 Bifrost URL: ${process.env.BIFROST_BASE_URL}`)
console.log(`🕐 Started at: ${new Date().toISOString()}`)
// Check for available API keys
const apiKeys = {
OpenAI: !!process.env.OPENAI_API_KEY,
Anthropic: !!process.env.ANTHROPIC_API_KEY,
Google: !!process.env.GEMINI_API_KEY,
Bedrock: !!process.env.AWS_ACCESS_KEY_ID,
Cohere: !!process.env.COHERE_API_KEY,
Azure: !!process.env.AZURE_API_KEY,
}
console.log('\n🔑 Available API Keys:')
for (const [provider, available] of Object.entries(apiKeys)) {
const status = available ? '✅' : '❌'
console.log(` ${status} ${provider}`)
}
console.log('='.repeat(50) + '\n')
// Global test timeout (can be overridden per test)
// This is set in vitest.config.ts but documented here
// Default: 300000ms (5 minutes) for integration tests
// Export for use in tests if needed
export const testEnvironment = {
bifrostUrl: process.env.BIFROST_BASE_URL,
availableProviders: Object.entries(apiKeys)
.filter(([, available]) => available)
.map(([provider]) => provider.toLowerCase()),
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,662 @@
/**
* Bedrock Integration Tests - Cross-Provider Support
*
* This test suite uses the AWS SDK (v3) to test against multiple AI providers through Bifrost.
* Tests automatically run against all available providers with proper capability filtering.
* All requests include the x-model-provider header to route to the appropriate provider.
*
* Test Scenarios:
* 1. Simple chat (converse)
* 2. Multi-turn conversation (converse)
* 3. Streaming chat (converse-stream)
* 4. Single tool call (converse)
* 5. Multiple tool calls (converse)
* 6. End-to-end tool calling (converse)
* 7. Image analysis (converse)
* 8. System message handling (converse)
*/
import {
BedrockRuntimeClient,
ConverseCommand,
ConverseStreamCommand,
type ContentBlock,
type Message,
type Tool,
type ToolConfiguration,
type ToolResultContentBlock,
type ToolUseBlock,
} from '@aws-sdk/client-bedrock-runtime'
import { describe, expect, it } from 'vitest'
import {
getConfig,
getIntegrationUrl,
getProviderModel,
} from '../src/utils/config-loader'
import {
BASE64_IMAGE,
CALCULATOR_TOOL,
LOCATION_KEYWORDS,
MULTI_TURN_MESSAGES,
MULTIPLE_TOOL_CALL_MESSAGES,
SIMPLE_CHAT_MESSAGES,
WEATHER_KEYWORDS,
WEATHER_TOOL,
mockToolResponse,
type ChatMessage,
type ToolDefinition,
} from '../src/utils/common'
import {
formatProviderModel,
getCrossProviderParamsWithVkForScenario,
shouldSkipNoProviders,
type ProviderModelVkParam,
} from '../src/utils/parametrize'
// ============================================================================
// Helper Functions
// ============================================================================
function getBedrockRuntimeClient(): BedrockRuntimeClient {
const baseUrl = getIntegrationUrl('bedrock')
const config = getConfig()
const integrationSettings = config.getIntegrationSettings('bedrock')
const region = (integrationSettings.region as string) || 'us-west-2'
return new BedrockRuntimeClient({
region,
endpoint: baseUrl,
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID || '',
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || '',
},
requestHandler: {
requestTimeout: 300000, // 5 minutes
} as never,
})
}
function convertToBedrockMessages(messages: ChatMessage[]): Message[] {
const bedrockMessages: Message[] = []
for (const msg of messages) {
if (msg.role === 'system') {
continue
}
const content: ContentBlock[] = []
if (Array.isArray(msg.content)) {
for (const item of msg.content) {
if (item.type === 'text') {
content.push({ text: item.text })
} else if (item.type === 'image_url' && item.image_url) {
const url = item.image_url.url
if (url.startsWith('data:image')) {
const [header, data] = url.split(',')
const mediaType = header.split(';')[0].split(':')[1]
const format = mediaType.split('/')[1] as 'png' | 'jpeg' | 'gif' | 'webp'
const imageBytes = Buffer.from(data, 'base64')
content.push({
image: {
format,
source: { bytes: imageBytes },
},
})
}
}
}
} else {
content.push({ text: msg.content })
}
const role = msg.role === 'user' ? 'user' : 'assistant'
bedrockMessages.push({ role, content })
}
return bedrockMessages
}
function convertToBedrockTools(tools: ToolDefinition[]): ToolConfiguration {
const bedrockTools: Tool[] = tools.map((tool) => ({
toolSpec: {
name: tool.name,
description: tool.description,
inputSchema: { json: tool.parameters },
},
}))
return { tools: bedrockTools }
}
function extractSystemMessages(messages: ChatMessage[]): { text: string }[] {
return messages
.filter((msg) => msg.role === 'system')
.map((msg) => ({ text: msg.content as string }))
}
function extractToolCalls(response: { output?: { message?: Message } }): Array<{
id: string
name: string
arguments: Record<string, unknown>
}> {
const toolCalls: Array<{
id: string
name: string
arguments: Record<string, unknown>
}> = []
const message = response.output?.message
if (!message?.content) return toolCalls
for (const item of message.content) {
if ('toolUse' in item && item.toolUse) {
const toolUse = item.toolUse as ToolUseBlock
toolCalls.push({
id: toolUse.toolUseId || '',
name: toolUse.name || '',
arguments: (toolUse.input as Record<string, unknown>) || {},
})
}
}
return toolCalls
}
function assertValidChatResponse(response: { output?: { message?: Message } }): void {
expect(response).toBeDefined()
expect(response.output).toBeDefined()
expect(response.output?.message).toBeDefined()
expect(response.output?.message?.content).toBeDefined()
expect(response.output?.message?.content?.length).toBeGreaterThan(0)
}
function assertHasToolCalls(
response: { output?: { message?: Message } },
expectedCount?: number
): void {
const toolCalls = extractToolCalls(response)
expect(toolCalls.length).toBeGreaterThan(0)
if (expectedCount !== undefined) {
expect(toolCalls.length).toBe(expectedCount)
}
}
function getTextContent(response: { output?: { message?: Message } }): string {
const message = response.output?.message
if (!message?.content) return ''
for (const item of message.content) {
if ('text' in item && item.text) {
return item.text
}
}
return ''
}
// ============================================================================
// Test Suite
// ============================================================================
describe('Bedrock SDK Integration Tests', () => {
// ============================================================================
// Simple Chat Tests
// ============================================================================
describe('Simple Chat', () => {
const testCases = getCrossProviderParamsWithVkForScenario('simple_chat', ['bedrock'])
it.each(testCases)(
'should complete a simple chat - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for simple_chat')
return
}
const client = getBedrockRuntimeClient()
const messages = convertToBedrockMessages(SIMPLE_CHAT_MESSAGES)
const modelId = formatProviderModel(provider, model)
const command = new ConverseCommand({
modelId,
messages,
inferenceConfig: { maxTokens: 100 },
})
const response = await client.send(command)
assertValidChatResponse(response)
const textContent = getTextContent(response)
expect(textContent.length).toBeGreaterThan(0)
console.log(`✅ Simple chat passed for ${modelId}`)
}
)
})
// ============================================================================
// Multi-turn Conversation Tests
// ============================================================================
describe('Multi-turn Conversation', () => {
const testCases = getCrossProviderParamsWithVkForScenario('multi_turn_conversation', ['bedrock'])
it.each(testCases)(
'should handle multi-turn conversation - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for multi_turn_conversation')
return
}
const client = getBedrockRuntimeClient()
const messages = convertToBedrockMessages(MULTI_TURN_MESSAGES)
const modelId = formatProviderModel(provider, model)
const command = new ConverseCommand({
modelId,
messages,
inferenceConfig: { maxTokens: 150 },
})
const response = await client.send(command)
assertValidChatResponse(response)
const textContent = getTextContent(response).toLowerCase()
const populationKeywords = ['population', 'million', 'people', 'inhabitants', 'resident']
expect(populationKeywords.some((word) => textContent.includes(word))).toBe(true)
console.log(`✅ Multi-turn conversation passed for ${modelId}`)
}
)
})
// ============================================================================
// Streaming Tests
// ============================================================================
describe('Streaming Chat', () => {
const testCases = getCrossProviderParamsWithVkForScenario('streaming', ['bedrock'])
it.each(testCases)(
'should stream chat response - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for streaming')
return
}
const client = getBedrockRuntimeClient()
const messages = convertToBedrockMessages([
{ role: 'user', content: 'Say hello in exactly 3 words.' },
])
const modelId = formatProviderModel(provider, model)
const command = new ConverseStreamCommand({
modelId,
messages,
inferenceConfig: { maxTokens: 100 },
})
const response = await client.send(command)
const chunks: string[] = []
if (response.stream) {
for await (const event of response.stream) {
if (event.contentBlockDelta) {
const delta = event.contentBlockDelta.delta
if (delta && 'text' in delta && delta.text) {
chunks.push(delta.text)
}
}
}
}
const combinedText = chunks.join('')
expect(combinedText.length).toBeGreaterThan(0)
console.log(`✅ Streaming chat passed for ${modelId}`)
}
)
})
// ============================================================================
// Streaming Client Disconnect Tests
// ============================================================================
describe('Streaming Chat - Client Disconnect', () => {
const testCases = getCrossProviderParamsWithVkForScenario('streaming', ['bedrock'])
it.each(testCases)(
'should handle client disconnect mid-stream - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for streaming')
return
}
const client = getBedrockRuntimeClient()
const abortController = new AbortController()
// Request a longer response to ensure we have time to abort mid-stream
const messages = convertToBedrockMessages([
{ role: 'user', content: 'Write a detailed essay about the history of computing, including at least 10 paragraphs.' },
])
const modelId = formatProviderModel(provider, model)
const command = new ConverseStreamCommand({
modelId,
messages,
inferenceConfig: { maxTokens: 1000 },
})
const response = await client.send(command, {
abortSignal: abortController.signal,
})
let chunkCount = 0
let content = ''
let wasAborted = false
try {
if (response.stream) {
for await (const event of response.stream) {
chunkCount++
if (event.contentBlockDelta) {
const delta = event.contentBlockDelta.delta
if (delta && 'text' in delta && delta.text) {
content += delta.text
}
}
// Abort after receiving a few chunks
if (chunkCount >= 5) {
abortController.abort()
}
}
}
} catch (error) {
wasAborted = true
expect(error).toBeDefined()
// The error should be an AbortError or contain abort-related message
const errorMessage = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase()
const errorName = (error as { name?: string })?.name?.toLowerCase() || ''
const isAbortError = errorMessage.includes('abort') ||
errorMessage.includes('cancel') ||
errorName.includes('abort') ||
error instanceof DOMException ||
(error as { name?: string })?.name === 'AbortError'
expect(isAbortError).toBe(true)
}
// Verify we received some content before aborting
expect(chunkCount).toBeGreaterThanOrEqual(5)
expect(content.length).toBeGreaterThan(0)
expect(wasAborted).toBe(true)
console.log(`✅ Streaming client disconnect passed for ${modelId} (${chunkCount} chunks before abort)`)
}
)
})
// ============================================================================
// Tool Calling Tests
// ============================================================================
describe('Single Tool Call', () => {
const testCases = getCrossProviderParamsWithVkForScenario('tool_calls', ['bedrock'])
it.each(testCases)(
'should make a single tool call - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for tool_calls')
return
}
const client = getBedrockRuntimeClient()
const toolModel = getProviderModel(provider, 'tools')
const modelId = formatProviderModel(provider, toolModel || model)
const messages = convertToBedrockMessages([
{ role: 'user', content: "What's the weather in Boston?" },
])
const toolConfig = convertToBedrockTools([WEATHER_TOOL])
toolConfig.toolChoice = { any: {} }
const command = new ConverseCommand({
modelId,
messages,
toolConfig,
inferenceConfig: { maxTokens: 500 },
})
const response = await client.send(command)
assertHasToolCalls(response, 1)
const toolCalls = extractToolCalls(response)
expect(toolCalls[0].name).toBe('get_weather')
console.log(`✅ Single tool call passed for ${modelId}`)
}
)
})
describe('Multiple Tool Calls', () => {
const testCases = getCrossProviderParamsWithVkForScenario('multiple_tool_calls', ['bedrock'])
it.each(testCases)(
'should make multiple tool calls - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for multiple_tool_calls')
return
}
const client = getBedrockRuntimeClient()
const toolModel = getProviderModel(provider, 'tools')
const modelId = formatProviderModel(provider, toolModel || model)
const messages = convertToBedrockMessages(MULTIPLE_TOOL_CALL_MESSAGES)
const toolConfig = convertToBedrockTools([WEATHER_TOOL, CALCULATOR_TOOL])
toolConfig.toolChoice = { any: {} }
const command = new ConverseCommand({
modelId,
messages,
toolConfig,
inferenceConfig: { maxTokens: 200 },
})
const response = await client.send(command)
const toolCalls = extractToolCalls(response)
expect(toolCalls.length).toBeGreaterThanOrEqual(1)
const toolNames = toolCalls.map((tc) => tc.name)
const expectedTools = ['get_weather', 'calculate']
expect(toolNames.some((name) => expectedTools.includes(name))).toBe(true)
console.log(`✅ Multiple tool calls passed for ${modelId}`)
}
)
})
describe('End-to-End Tool Calling', () => {
const testCases = getCrossProviderParamsWithVkForScenario('end2end_tool_calling', ['bedrock'])
it.each(testCases)(
'should complete end-to-end tool calling - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for end2end_tool_calling')
return
}
const client = getBedrockRuntimeClient()
const toolModel = getProviderModel(provider, 'tools')
const modelId = formatProviderModel(provider, toolModel || model)
// Step 1: Initial request
let messages = convertToBedrockMessages([
{ role: 'user', content: "What's the weather in San Francisco?" },
])
const toolConfig = convertToBedrockTools([WEATHER_TOOL])
toolConfig.toolChoice = { any: {} }
const command1 = new ConverseCommand({
modelId,
messages,
toolConfig,
inferenceConfig: { maxTokens: 500 },
})
const response1 = await client.send(command1)
assertHasToolCalls(response1, 1)
const toolCalls = extractToolCalls(response1)
expect(toolCalls[0].name).toBe('get_weather')
// Step 2: Append assistant response and tool result
const assistantMessage = response1.output?.message
if (assistantMessage) {
messages = [...messages, assistantMessage]
}
const toolCall = toolCalls[0]
const toolResponseText = mockToolResponse(toolCall.name, toolCall.arguments)
const toolResultContent: ToolResultContentBlock[] = [{ text: toolResponseText }]
messages.push({
role: 'user',
content: [
{
toolResult: {
toolUseId: toolCall.id,
content: toolResultContent,
status: 'success',
},
},
],
})
// Step 3: Final request with tool results
const command2 = new ConverseCommand({
modelId,
messages,
toolConfig,
inferenceConfig: { maxTokens: 500 },
})
const response2 = await client.send(command2)
assertValidChatResponse(response2)
const finalText = getTextContent(response2).toLowerCase()
const weatherLocationKeywords = [...WEATHER_KEYWORDS, ...LOCATION_KEYWORDS, 'san francisco', 'sf']
expect(weatherLocationKeywords.some((word) => finalText.includes(word))).toBe(true)
console.log(`✅ End-to-end tool calling passed for ${modelId}`)
}
)
})
// ============================================================================
// Image Analysis Tests
// ============================================================================
describe('Image Base64', () => {
const testCases = getCrossProviderParamsWithVkForScenario('image_base64', ['bedrock'])
it.each(testCases)(
'should analyze image from Base64 - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for image_base64')
return
}
const client = getBedrockRuntimeClient()
const visionModel = getProviderModel(provider, 'vision')
const modelId = formatProviderModel(provider, visionModel || model)
const messages = convertToBedrockMessages([
{
role: 'user',
content: [
{
type: 'text',
text: 'What do you see in this image? Describe what you see.',
},
{
type: 'image_url',
image_url: { url: `data:image/png;base64,${BASE64_IMAGE}` },
},
],
},
])
const command = new ConverseCommand({
modelId,
messages,
inferenceConfig: { maxTokens: 500 },
})
const response = await client.send(command)
assertValidChatResponse(response)
const textContent = getTextContent(response).toLowerCase()
const imageKeywords = [
'image', 'picture', 'photo', 'see', 'visual', 'show',
'appear', 'color', 'scene', 'pixel', 'red', 'square',
]
const hasImageReference = imageKeywords.some((keyword) => textContent.includes(keyword))
expect(hasImageReference || textContent.length > 5).toBe(true)
console.log(`✅ Image Base64 analysis passed for ${modelId}`)
}
)
})
// ============================================================================
// System Message Tests
// ============================================================================
describe('System Message', () => {
const testCases = getCrossProviderParamsWithVkForScenario('simple_chat', ['bedrock'])
it.each(testCases)(
'should handle system message - $provider (VK: $vkEnabled)',
async ({ provider, model, vkEnabled }: ProviderModelVkParam) => {
if (shouldSkipNoProviders({ provider, model, vkEnabled })) {
console.log('Skipping: No providers available for simple_chat')
return
}
const client = getBedrockRuntimeClient()
const modelId = formatProviderModel(provider, model)
const messagesWithSystem: ChatMessage[] = [
{ role: 'system', content: 'You are a helpful assistant that always responds in exactly 5 words.' },
{ role: 'user', content: 'Hello, how are you?' },
]
const systemMessages = extractSystemMessages(messagesWithSystem)
const bedrockMessages = convertToBedrockMessages(messagesWithSystem)
const command = new ConverseCommand({
modelId,
messages: bedrockMessages,
system: systemMessages,
inferenceConfig: { maxTokens: 50 },
})
const response = await client.send(command)
assertValidChatResponse(response)
const textContent = getTextContent(response)
expect(textContent.length).toBeGreaterThan(0)
// Check if response is approximately 5 words (allow some flexibility)
const wordCount = textContent.split(/\s+/).length
expect(wordCount).toBeGreaterThanOrEqual(3)
expect(wordCount).toBeLessThanOrEqual(10)
console.log(`✅ System message handling passed for ${modelId}`)
}
)
})
})

View File

@@ -0,0 +1,748 @@
/**
* Google GenAI Integration Tests
*
* This test suite uses the Google Generative AI SDK to test Gemini models.
* Note: The @google/generative-ai SDK does not support custom base URL configuration,
* so these tests validate the SDK directly against Google's API rather than routing
* through Bifrost. To test Google models through Bifrost, use the OpenAI SDK with
* model name routing (e.g., model: "gemini/gemini-1.5-pro") or the LangChain tests.
*
* Tests cover chat, streaming, tool calling, and vision capabilities.
*
* Test Scenarios:
* 1. Simple chat
* 2. Multi-turn conversation
* 3. Streaming chat
* 4. Single tool call
* 5. Multiple tool calls
* 6. End-to-end tool calling
* 7. Image Base64
* 8. Embeddings
* 9. Count tokens
*/
import { describe, it, expect, beforeAll } from 'vitest'
import {
GoogleGenerativeAI,
GenerativeModel,
Content,
Part,
FunctionDeclaration,
Tool,
SchemaType,
} from '@google/generative-ai'
// Explicit type mapping for tool parameters to avoid invalid enum values from toUpperCase()
const TYPE_MAP: Record<string, SchemaType> = {
string: SchemaType.STRING,
number: SchemaType.NUMBER,
integer: SchemaType.INTEGER,
boolean: SchemaType.BOOLEAN,
array: SchemaType.ARRAY,
object: SchemaType.OBJECT,
}
import {
getIntegrationUrl,
getProviderModel,
isProviderAvailable,
getConfig,
} from '../src/utils/config-loader'
import {
SIMPLE_CHAT_MESSAGES,
MULTI_TURN_MESSAGES,
STREAMING_CHAT_MESSAGES,
SINGLE_TOOL_CALL_MESSAGES,
MULTIPLE_TOOL_CALL_MESSAGES,
BASE64_IMAGE,
WEATHER_TOOL,
CALCULATOR_TOOL,
EMBEDDINGS_SINGLE_TEXT,
EMBEDDINGS_MULTIPLE_TEXTS,
getApiKey,
hasApiKey,
mockToolResponse,
type ChatMessage,
type ToolDefinition,
} from '../src/utils/common'
// ============================================================================
// Helper Functions
// ============================================================================
function getGoogleClient(): GoogleGenerativeAI {
// Note: The @google/generative-ai SDK does not support custom base URL configuration.
// Unlike OpenAI and Anthropic SDKs, requests cannot be routed through Bifrost directly.
// These tests validate the Google GenAI SDK directly against Google's API.
// To test Google models through Bifrost, use the OpenAI SDK with model name routing
// (e.g., model: "gemini/gemini-1.5-pro") or the LangChain tests.
const apiKey = hasApiKey('gemini') ? getApiKey('gemini') : 'dummy-key'
return new GoogleGenerativeAI(apiKey)
}
function getGenerativeModel(modelName?: string): GenerativeModel {
const client = getGoogleClient()
const model = modelName || getProviderModel('gemini', 'chat')
return client.getGenerativeModel({ model })
}
function convertToGoogleContent(messages: ChatMessage[]): Content[] {
return messages.map((msg) => {
const role = msg.role === 'assistant' ? 'model' : 'user'
if (typeof msg.content === 'string') {
return {
role,
parts: [{ text: msg.content }],
}
}
// Handle multimodal content
const parts: Part[] = msg.content.map((part) => {
if (part.type === 'text') {
return { text: part.text! }
}
// Handle image content
const imageUrl = part.image_url!.url
if (imageUrl.startsWith('data:')) {
// Extract base64 data and mime type
const matches = imageUrl.match(/^data:([^;]+);base64,(.+)$/)
if (matches) {
return {
inlineData: {
mimeType: matches[1],
data: matches[2],
},
}
}
}
// URL images - Google expects inline data, so we'd need to fetch
// For now, return a text placeholder
return { text: `[Image: ${imageUrl}]` }
})
return { role, parts }
})
}
function convertToGoogleTools(tools: ToolDefinition[]): Tool[] {
const functionDeclarations: FunctionDeclaration[] = tools.map((tool) => ({
name: tool.name,
description: tool.description,
parameters: {
type: SchemaType.OBJECT,
properties: Object.fromEntries(
Object.entries(tool.parameters.properties).map(([key, value]) => [
key,
{
type: TYPE_MAP[value.type] || SchemaType.STRING,
description: value.description,
...(value.enum ? { enum: value.enum } : {}),
},
])
),
required: tool.parameters.required || [],
},
}))
return [{ functionDeclarations }]
}
interface GoogleToolCall {
name: string
arguments: Record<string, unknown>
}
function extractGoogleToolCalls(response: { response: { candidates?: Array<{ content?: { parts?: Part[] } }> } }): GoogleToolCall[] {
const toolCalls: GoogleToolCall[] = []
const candidates = response.response.candidates || []
for (const candidate of candidates) {
const parts = candidate.content?.parts || []
for (const part of parts) {
if ('functionCall' in part && part.functionCall) {
toolCalls.push({
name: part.functionCall.name,
arguments: part.functionCall.args as Record<string, unknown>,
})
}
}
}
return toolCalls
}
function getResponseText(response: { response: { text: () => string } }): string {
try {
return response.response.text()
} catch {
return ''
}
}
// ============================================================================
// Test Suite
// ============================================================================
describe('Google GenAI SDK Integration Tests', () => {
const skipTests = !isProviderAvailable('gemini')
beforeAll(() => {
if (skipTests) {
console.log('⚠️ Skipping Google GenAI tests: GEMINI_API_KEY not set')
}
})
// ============================================================================
// Simple Chat Tests
// ============================================================================
describe('Simple Chat', () => {
it('should complete a simple chat', async () => {
if (skipTests) return
const model = getGenerativeModel()
const modelName = getProviderModel('gemini', 'chat')
const result = await model.generateContent(SIMPLE_CHAT_MESSAGES[0].content as string)
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.length).toBeGreaterThan(0)
console.log(`✅ Simple chat passed for google/${modelName}`)
})
})
// ============================================================================
// Multi-turn Conversation Tests
// ============================================================================
describe('Multi-turn Conversation', () => {
it('should handle multi-turn conversation', async () => {
if (skipTests) return
const model = getGenerativeModel()
const modelName = getProviderModel('gemini', 'chat')
const chat = model.startChat({
history: convertToGoogleContent(MULTI_TURN_MESSAGES.slice(0, -1)),
})
const result = await chat.sendMessage(MULTI_TURN_MESSAGES[MULTI_TURN_MESSAGES.length - 1].content as string)
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.toLowerCase()).toMatch(/paris|population|million|people/i)
console.log(`✅ Multi-turn conversation passed for google/${modelName}`)
})
})
// ============================================================================
// Streaming Tests
// ============================================================================
describe('Streaming Chat', () => {
it('should stream chat response', async () => {
if (skipTests) return
const model = getGenerativeModel()
const modelName = getProviderModel('gemini', 'chat')
const result = await model.generateContentStream(STREAMING_CHAT_MESSAGES[0].content as string)
let content = ''
for await (const chunk of result.stream) {
const text = chunk.text()
if (text) {
content += text
}
}
expect(content.length).toBeGreaterThan(0)
console.log(`✅ Streaming chat passed for google/${modelName}`)
})
})
// ============================================================================
// Tool Calling Tests
// ============================================================================
describe('Single Tool Call', () => {
it('should make a single tool call', async () => {
if (skipTests) return
const toolModel = getProviderModel('gemini', 'tools')
const model = getGenerativeModel(toolModel)
const result = await model.generateContent({
contents: convertToGoogleContent(SINGLE_TOOL_CALL_MESSAGES),
tools: convertToGoogleTools([WEATHER_TOOL]),
})
const toolCalls = extractGoogleToolCalls(result)
expect(toolCalls.length).toBe(1)
expect(toolCalls[0].name).toBe('get_weather')
console.log(`✅ Single tool call passed for google/${toolModel}`)
})
})
describe('Multiple Tool Calls', () => {
it('should make multiple tool calls', async () => {
if (skipTests) return
const toolModel = getProviderModel('gemini', 'tools')
const model = getGenerativeModel(toolModel)
const result = await model.generateContent({
contents: convertToGoogleContent(MULTIPLE_TOOL_CALL_MESSAGES),
tools: convertToGoogleTools([WEATHER_TOOL, CALCULATOR_TOOL]),
})
const toolCalls = extractGoogleToolCalls(result)
expect(toolCalls.length).toBeGreaterThanOrEqual(1)
const toolNames = toolCalls.map((tc) => tc.name)
expect(toolNames.some((name) => name === 'get_weather' || name === 'calculate')).toBe(true)
console.log(`✅ Multiple tool calls passed for google/${toolModel}`)
})
})
describe('End-to-End Tool Calling', () => {
it('should complete end-to-end tool calling', async () => {
if (skipTests) return
const toolModel = getProviderModel('gemini', 'tools')
const model = getGenerativeModel(toolModel)
// Step 1: Initial request with tools
const chat = model.startChat({
tools: convertToGoogleTools([WEATHER_TOOL]),
})
const result1 = await chat.sendMessage(SINGLE_TOOL_CALL_MESSAGES[0].content as string)
const toolCalls = extractGoogleToolCalls(result1)
expect(toolCalls.length).toBeGreaterThan(0)
// Step 2: Execute tool and get result
const toolResult = mockToolResponse(toolCalls[0].name, toolCalls[0].arguments)
// Step 3: Send tool result back
const result2 = await chat.sendMessage([
{
functionResponse: {
name: toolCalls[0].name,
response: JSON.parse(toolResult),
},
},
])
expect(result2).toBeDefined()
const text = getResponseText(result2)
expect(text.length).toBeGreaterThan(0)
console.log(`✅ End-to-end tool calling passed for google/${toolModel}`)
})
})
// ============================================================================
// Image/Vision Tests
// ============================================================================
describe('Image Base64', () => {
it('should analyze image from Base64', async () => {
if (skipTests) return
const visionModel = getProviderModel('gemini', 'vision')
const model = getGenerativeModel(visionModel)
const result = await model.generateContent([
{ text: 'What color is this image?' },
{
inlineData: {
mimeType: 'image/png',
data: BASE64_IMAGE,
},
},
])
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.length).toBeGreaterThan(10)
console.log(`✅ Image Base64 analysis passed for google/${visionModel}`)
})
})
// ============================================================================
// Embeddings Tests
// ============================================================================
describe('Embeddings - Single Text', () => {
it('should generate single text embedding', async () => {
if (skipTests) return
const client = getGoogleClient()
const embeddingsModel = getProviderModel('gemini', 'embeddings')
// Skip if no embeddings model available
if (!embeddingsModel) {
console.log('⚠️ Skipping embeddings test: No embeddings model configured')
return
}
const model = client.getGenerativeModel({ model: embeddingsModel })
const result = await model.embedContent(EMBEDDINGS_SINGLE_TEXT)
expect(result).toBeDefined()
expect(result.embedding).toBeDefined()
expect(result.embedding.values).toBeDefined()
expect(result.embedding.values.length).toBeGreaterThan(0)
console.log(`✅ Single text embedding passed for google/${embeddingsModel}`)
})
})
describe('Embeddings - Batch', () => {
it('should generate batch embeddings', async () => {
if (skipTests) return
const client = getGoogleClient()
const embeddingsModel = getProviderModel('gemini', 'embeddings')
// Skip if no embeddings model available
if (!embeddingsModel) {
console.log('⚠️ Skipping embeddings test: No embeddings model configured')
return
}
const model = client.getGenerativeModel({ model: embeddingsModel })
const result = await model.batchEmbedContents({
requests: EMBEDDINGS_MULTIPLE_TEXTS.map((text) => ({ content: { parts: [{ text }], role: 'user' } })),
})
expect(result).toBeDefined()
expect(result.embeddings).toBeDefined()
expect(result.embeddings.length).toBe(EMBEDDINGS_MULTIPLE_TEXTS.length)
console.log(`✅ Batch embeddings passed for google/${embeddingsModel}`)
})
})
// ============================================================================
// Count Tokens Tests
// ============================================================================
describe('Count Tokens', () => {
it('should count tokens', async () => {
if (skipTests) return
const model = getGenerativeModel()
const modelName = getProviderModel('gemini', 'chat')
const result = await model.countTokens('Hello, how are you today?')
expect(result).toBeDefined()
expect(result.totalTokens).toBeGreaterThan(0)
console.log(`✅ Count tokens passed for google/${modelName} (${result.totalTokens} tokens)`)
})
})
// ============================================================================
// Thinking/Extended Reasoning Tests
// ============================================================================
describe('Thinking/Extended Reasoning', () => {
it('should support extended thinking', async () => {
if (skipTests) return
const thinkingModel = getProviderModel('gemini', 'thinking')
// Skip if no thinking model available
if (!thinkingModel) {
console.log('⚠️ Skipping thinking test: No thinking model configured')
return
}
const model = getGenerativeModel(thinkingModel)
try {
const result = await model.generateContent({
contents: [
{
role: 'user',
parts: [{ text: 'What is 15% of 80? Show your reasoning step by step.' }],
},
],
generationConfig: {
// Google Gemini uses different config for reasoning
maxOutputTokens: 2048,
},
})
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.length).toBeGreaterThan(0)
console.log(`✅ Thinking/Extended reasoning passed for google/${thinkingModel}`)
} catch (error) {
console.log(`⚠️ Thinking test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
// ============================================================================
// Audio Transcription Tests
// ============================================================================
describe('Audio Transcription', () => {
it('should transcribe audio content', async () => {
if (skipTests) return
const transcriptionModel = getProviderModel('gemini', 'transcription')
// Skip if no transcription model available
if (!transcriptionModel) {
console.log('⚠️ Skipping transcription test: No transcription model configured')
return
}
const model = getGenerativeModel(transcriptionModel)
// Generate a minimal audio WAV buffer for testing
const sampleRate = 16000
const duration = 0.5 // 0.5 seconds
const numSamples = Math.floor(sampleRate * duration)
const frequency = 440 // A4 note
// Create WAV header
const headerSize = 44
const dataSize = numSamples * 2
const buffer = new ArrayBuffer(headerSize + dataSize)
const view = new DataView(buffer)
// RIFF header
const encoder = new TextEncoder()
new Uint8Array(buffer, 0, 4).set(encoder.encode('RIFF'))
view.setUint32(4, headerSize + dataSize - 8, true)
new Uint8Array(buffer, 8, 4).set(encoder.encode('WAVE'))
// fmt chunk
new Uint8Array(buffer, 12, 4).set(encoder.encode('fmt '))
view.setUint32(16, 16, true)
view.setUint16(20, 1, true)
view.setUint16(22, 1, true)
view.setUint32(24, sampleRate, true)
view.setUint32(28, sampleRate * 2, true)
view.setUint16(32, 2, true)
view.setUint16(34, 16, true)
// data chunk
new Uint8Array(buffer, 36, 4).set(encoder.encode('data'))
view.setUint32(40, dataSize, true)
// Generate sine wave
for (let i = 0; i < numSamples; i++) {
const t = i / sampleRate
const sample = Math.sin(2 * Math.PI * frequency * t) * 32767 * 0.5
view.setInt16(headerSize + i * 2, Math.round(sample), true)
}
const audioBase64 = btoa(String.fromCharCode(...new Uint8Array(buffer)))
try {
const result = await model.generateContent([
{ text: 'Please transcribe this audio.' },
{
inlineData: {
mimeType: 'audio/wav',
data: audioBase64,
},
},
])
expect(result).toBeDefined()
// Note: A sine wave may not produce meaningful transcription
console.log(`✅ Audio transcription passed for google/${transcriptionModel}`)
} catch (error) {
console.log(`⚠️ Transcription test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
// ============================================================================
// Speech Synthesis Tests
// ============================================================================
describe('Speech Synthesis', () => {
it('should synthesize speech', async () => {
if (skipTests) return
const speechModel = getProviderModel('gemini', 'speech')
// Skip if no speech model available
if (!speechModel) {
console.log('⚠️ Skipping speech synthesis test: No speech model configured')
return
}
// Google Gemini TTS requires specific API usage
// This test verifies the model is accessible
try {
const model = getGenerativeModel(speechModel)
const result = await model.generateContent({
contents: [
{
role: 'user',
parts: [{ text: 'Hello, this is a test of speech synthesis.' }],
},
],
generationConfig: {
// TTS specific configuration
responseModalities: ['AUDIO'],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: {
voiceName: 'Puck',
},
},
},
} as never,
})
expect(result).toBeDefined()
console.log(`✅ Speech synthesis passed for google/${speechModel}`)
} catch (error) {
console.log(`⚠️ Speech synthesis test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
// ============================================================================
// Document/PDF Input Tests
// ============================================================================
describe('Document Input - PDF', () => {
it('should handle PDF document input', async () => {
if (skipTests) return
const fileModel = getProviderModel('gemini', 'file')
// Skip if no file model available
if (!fileModel) {
console.log('⚠️ Skipping document input test: No file model configured')
return
}
const model = getGenerativeModel(fileModel)
// Sample PDF base64 (minimal PDF with "Hello World")
const pdfBase64 =
'JVBERi0xLjcKCjEgMCBvYmogICUgZW50cnkgcG9pbnQKPDwKICAvVHlwZSAvQ2F0YWxvZwogIC' +
'9QYWdlcyAyIDAgUgo+PgplbmRvYmoKCjIgMCBvYmoKPDwKICAvVHlwZSAvUGFnZXwKICAvTWV' +
'kaWFCb3ggWyAwIDAgMjAwIDIwMCBdCiAgL0NvdW50IDEKICAvS2lkcyBbIDMgMCBSIF0KPj4K' +
'ZW5kb2JqCgozIDAgb2JqCjw8CiAgL1R5cGUgL1BhZ2UKICAvUGFyZW50IDIgMCBSCiAgL1Jlc' +
'291cmNlcyA8PAogICAgL0ZvbnQgPDwKICAgICAgL0YxIDQgMCBSCj4+CiAgPj4KICAvQ29udG' +
'VudHMgNSAwIFIKPj4KZW5kb2JqCgo0IDAgb2JqCjw8CiAgL1R5cGUgL0ZvbnQKICAvU3VidHl' +
'wZSAvVHlwZTEKICAvQmFzZUZvbnQgL1RpbWVzLVJvbWFuCj4+CmVuZG9iagoKNSAwIG9iago8' +
'PAogIC9MZW5ndGggNDQKPj4Kc3RyZWFtCkJUCjcwIDUwIFRECi9GMSAxMiBUZgooSGVsbG8gV' +
'29ybGQhKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCgp4cmVmCjAgNgowMDAwMDAwMDAwIDY1NT' +
'M1IGYgCjAwMDAwMDAwMTAgMDAwMDAgbiAKMDAwMDAwMDA2MCAwMDAwMCBuIAowMDAwMDAwMTU' +
'3IDAwMDAwIG4gCjAwMDAwMDAyNTUgMDAwMDAgbiAKMDAwMDAwMDM1MyAwMDAwMCBuIAp0cmFp' +
'bGVyCjw8CiAgL1NpemUgNgogIC9Sb290IDEgMCBSCj4+CnN0YXJ0eHJlZgo0NDkKJSVFT0YK'
try {
const result = await model.generateContent([
{ text: 'What does this PDF document contain?' },
{
inlineData: {
mimeType: 'application/pdf',
data: pdfBase64,
},
},
])
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.length).toBeGreaterThan(0)
console.log(`✅ Document input (PDF) passed for google/${fileModel}`)
} catch (error) {
console.log(`⚠️ Document input test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
// ============================================================================
// System Instruction Tests
// ============================================================================
describe('System Instruction', () => {
it('should respect system instructions', async () => {
if (skipTests) return
const model = getGenerativeModel()
const modelName = getProviderModel('gemini', 'chat')
const client = getGoogleClient()
const systemModel = client.getGenerativeModel({
model: modelName,
systemInstruction: 'You are a helpful assistant that always responds in exactly 5 words.',
})
try {
const result = await systemModel.generateContent('Hello, how are you?')
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.length).toBeGreaterThan(0)
// Check if response is approximately 5 words
const wordCount = text.trim().split(/\s+/).length
expect(wordCount).toBeGreaterThanOrEqual(3)
expect(wordCount).toBeLessThanOrEqual(10)
console.log(`✅ System instruction passed for google/${modelName}`)
} catch (error) {
console.log(`⚠️ System instruction test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
// ============================================================================
// Structured Output Tests
// ============================================================================
describe('Structured Output', () => {
it('should generate structured output with JSON schema', async () => {
if (skipTests) return
const model = getGenerativeModel()
const modelName = getProviderModel('gemini', 'chat')
try {
const result = await model.generateContent({
contents: [
{
role: 'user',
parts: [{ text: 'Give me a recipe for chocolate chip cookies as JSON with name, ingredients (array), and instructions (array).' }],
},
],
generationConfig: {
responseMimeType: 'application/json',
},
})
expect(result).toBeDefined()
const text = getResponseText(result)
expect(text.length).toBeGreaterThan(0)
// Try to parse as JSON
const parsed = JSON.parse(text)
expect(parsed).toBeDefined()
console.log(`✅ Structured output passed for google/${modelName}`)
} catch (error) {
console.log(`⚠️ Structured output test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
})

View File

@@ -0,0 +1,864 @@
/**
* LangChain.js Integration Tests
*
* This test suite uses LangChain.js to test multiple AI providers through Bifrost.
* Tests cover chat, streaming, tool calling, and structured output capabilities.
*
* Providers tested:
* - OpenAI (via @langchain/openai)
* - Anthropic (via @langchain/anthropic)
* - Google GenAI (via @langchain/google-genai)
*
* Test Scenarios:
* 1. Simple chat
* 2. Multi-turn conversation
* 3. Streaming chat
* 4. Tool calling
* 5. Structured output
*/
import { describe, it, expect, beforeAll } from 'vitest'
import { ChatOpenAI } from '@langchain/openai'
import { ChatAnthropic } from '@langchain/anthropic'
import { ChatGoogleGenerativeAI } from '@langchain/google-genai'
import { HumanMessage, AIMessage, SystemMessage, BaseMessage } from '@langchain/core/messages'
import { DynamicStructuredTool } from '@langchain/core/tools'
import { z } from 'zod'
import {
getIntegrationUrl,
getProviderModel,
isProviderAvailable,
} from '../src/utils/config-loader'
import {
SIMPLE_CHAT_MESSAGES,
MULTI_TURN_MESSAGES,
STREAMING_CHAT_MESSAGES,
SINGLE_TOOL_CALL_MESSAGES,
getApiKey,
hasApiKey,
mockToolResponse,
type ChatMessage,
} from '../src/utils/common'
// ============================================================================
// Helper Functions
// ============================================================================
type LangChainModel = ChatOpenAI | ChatAnthropic | ChatGoogleGenerativeAI
function getLangChainOpenAI(): ChatOpenAI {
const baseUrl = getIntegrationUrl('openai')
const apiKey = hasApiKey('openai') ? getApiKey('openai') : 'dummy-key'
const model = getProviderModel('openai', 'chat')
return new ChatOpenAI({
modelName: model,
openAIApiKey: apiKey,
configuration: {
baseURL: baseUrl,
},
maxTokens: 100,
timeout: 300000,
maxRetries: 3,
})
}
function getLangChainAnthropic(): ChatAnthropic {
const baseUrl = getIntegrationUrl('anthropic')
const apiKey = hasApiKey('anthropic') ? getApiKey('anthropic') : 'dummy-key'
const model = getProviderModel('anthropic', 'chat')
return new ChatAnthropic({
modelName: model,
anthropicApiKey: apiKey,
anthropicApiUrl: baseUrl,
maxTokens: 100,
maxRetries: 3,
})
}
function getLangChainGoogle(): ChatGoogleGenerativeAI {
// Use 'gemini' consistently for both API key and model lookup
const apiKey = hasApiKey('gemini') ? getApiKey('gemini') : 'dummy-key'
const model = getProviderModel('gemini', 'chat')
return new ChatGoogleGenerativeAI({
modelName: model,
apiKey,
maxOutputTokens: 100,
maxRetries: 3,
})
}
function convertToLangChainMessages(messages: ChatMessage[]): BaseMessage[] {
return messages.map((msg) => {
const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
switch (msg.role) {
case 'system':
return new SystemMessage(content)
case 'assistant':
return new AIMessage(content)
case 'user':
default:
return new HumanMessage(content)
}
})
}
// Weather tool using Zod schema
const weatherTool = new DynamicStructuredTool({
name: 'get_weather',
description: 'Get the current weather for a location',
schema: z.object({
location: z.string().describe('The city and state, e.g. San Francisco, CA'),
unit: z.enum(['celsius', 'fahrenheit']).optional().describe('The temperature unit'),
}),
func: async ({ location, unit }) => {
return mockToolResponse('get_weather', { location, unit })
},
})
// Calculator tool using Zod schema
const calculatorTool = new DynamicStructuredTool({
name: 'calculate',
description: 'Perform basic mathematical calculations',
schema: z.object({
expression: z.string().describe("Mathematical expression to evaluate, e.g. '2 + 2'"),
}),
func: async ({ expression }) => {
return mockToolResponse('calculate', { expression })
},
})
// ============================================================================
// Test Suite
// ============================================================================
describe('LangChain.js Integration Tests', () => {
// ============================================================================
// OpenAI via LangChain
// ============================================================================
describe('LangChain OpenAI', () => {
const skipTests = !isProviderAvailable('openai')
beforeAll(() => {
if (skipTests) {
console.log('⚠️ Skipping LangChain OpenAI tests: OPENAI_API_KEY not set')
}
})
describe('Simple Chat', () => {
it('should complete a simple chat', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const messages = convertToLangChainMessages(SIMPLE_CHAT_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
expect(response.content).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain OpenAI simple chat passed`)
})
})
describe('Multi-turn Conversation', () => {
it('should handle multi-turn conversation', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const messages = convertToLangChainMessages(MULTI_TURN_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.toLowerCase()).toMatch(/paris|population|million|people/i)
console.log(`✅ LangChain OpenAI multi-turn conversation passed`)
})
})
describe('Streaming Chat', () => {
it('should stream chat response', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const messages = convertToLangChainMessages(STREAMING_CHAT_MESSAGES)
const stream = await model.stream(messages)
let content = ''
for await (const chunk of stream) {
if (chunk.content) {
content += typeof chunk.content === 'string' ? chunk.content : JSON.stringify(chunk.content)
}
}
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain OpenAI streaming chat passed`)
})
})
describe('Streaming Chat - Client Disconnect', () => {
it('should handle client disconnect mid-stream', async () => {
if (skipTests) return
const baseUrl = getIntegrationUrl('openai')
const apiKey = hasApiKey('openai') ? getApiKey('openai') : 'dummy-key'
const modelName = getProviderModel('openai', 'chat')
// Create model with longer max tokens for a longer response
const model = new ChatOpenAI({
modelName,
openAIApiKey: apiKey,
configuration: {
baseURL: baseUrl,
},
maxTokens: 1000,
timeout: 300000,
})
const abortController = new AbortController()
const messages = convertToLangChainMessages([
{ role: 'user', content: 'Write a detailed essay about the history of computing, including at least 10 paragraphs.' },
])
const stream = await model.stream(messages, {
signal: abortController.signal,
})
let chunkCount = 0
let content = ''
let wasAborted = false
try {
for await (const chunk of stream) {
chunkCount++
if (chunk.content) {
content += typeof chunk.content === 'string' ? chunk.content : JSON.stringify(chunk.content)
}
// Abort after receiving a few chunks
if (chunkCount >= 3) {
abortController.abort()
}
}
} catch (error) {
wasAborted = true
expect(error).toBeDefined()
const errorMessage = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase()
const isAbortError = errorMessage.includes('abort') ||
errorMessage.includes('cancel') ||
error instanceof DOMException ||
(error as { name?: string })?.name === 'AbortError'
expect(isAbortError).toBe(true)
}
expect(chunkCount).toBeGreaterThanOrEqual(3)
expect(content.length).toBeGreaterThan(0)
expect(wasAborted).toBe(true)
console.log(`✅ LangChain OpenAI streaming client disconnect passed (${chunkCount} chunks before abort)`)
})
})
describe('Tool Calling', () => {
it('should make tool calls', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const modelWithTools = model.bindTools([weatherTool])
const messages = convertToLangChainMessages(SINGLE_TOOL_CALL_MESSAGES)
const response = await modelWithTools.invoke(messages)
expect(response).toBeDefined()
expect(response.tool_calls).toBeDefined()
expect(response.tool_calls!.length).toBeGreaterThan(0)
expect(response.tool_calls![0].name).toBe('get_weather')
console.log(`✅ LangChain OpenAI tool calling passed`)
})
})
describe('Structured Output', () => {
it('should generate structured output', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const ResponseSchema = z.object({
answer: z.string().describe('The answer to the question'),
confidence: z.number().min(0).max(1).describe('Confidence score'),
})
const structuredModel = model.withStructuredOutput(ResponseSchema)
const response = await structuredModel.invoke('What is 2 + 2?')
expect(response).toBeDefined()
expect(response.answer).toBeDefined()
expect(typeof response.confidence).toBe('number')
console.log(`✅ LangChain OpenAI structured output passed`)
})
})
})
// ============================================================================
// Anthropic via LangChain
// ============================================================================
describe('LangChain Anthropic', () => {
const skipTests = !isProviderAvailable('anthropic')
beforeAll(() => {
if (skipTests) {
console.log('⚠️ Skipping LangChain Anthropic tests: ANTHROPIC_API_KEY not set')
}
})
describe('Simple Chat', () => {
it('should complete a simple chat', async () => {
if (skipTests) return
const model = getLangChainAnthropic()
const messages = convertToLangChainMessages(SIMPLE_CHAT_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
expect(response.content).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain Anthropic simple chat passed`)
})
})
describe('Multi-turn Conversation', () => {
it('should handle multi-turn conversation', async () => {
if (skipTests) return
const model = getLangChainAnthropic()
const messages = convertToLangChainMessages(MULTI_TURN_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.toLowerCase()).toMatch(/paris|population|million|people/i)
console.log(`✅ LangChain Anthropic multi-turn conversation passed`)
})
})
describe('Streaming Chat', () => {
it('should stream chat response', async () => {
if (skipTests) return
const model = getLangChainAnthropic()
const messages = convertToLangChainMessages(STREAMING_CHAT_MESSAGES)
const stream = await model.stream(messages)
let content = ''
for await (const chunk of stream) {
if (chunk.content) {
content += typeof chunk.content === 'string' ? chunk.content : JSON.stringify(chunk.content)
}
}
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain Anthropic streaming chat passed`)
})
})
describe('Streaming Chat - Client Disconnect', () => {
it('should handle client disconnect mid-stream', async () => {
if (skipTests) return
const baseUrl = getIntegrationUrl('anthropic')
const apiKey = hasApiKey('anthropic') ? getApiKey('anthropic') : 'dummy-key'
const modelName = getProviderModel('anthropic', 'chat')
// Create model with longer max tokens for a longer response
const model = new ChatAnthropic({
modelName,
anthropicApiKey: apiKey,
anthropicApiUrl: baseUrl,
maxTokens: 1000,
maxRetries: 3,
})
const abortController = new AbortController()
const messages = convertToLangChainMessages([
{ role: 'user', content: 'Write a detailed essay about the history of computing, including at least 10 paragraphs.' },
])
const stream = await model.stream(messages, {
signal: abortController.signal,
})
let chunkCount = 0
let content = ''
let wasAborted = false
try {
for await (const chunk of stream) {
chunkCount++
if (chunk.content) {
content += typeof chunk.content === 'string' ? chunk.content : JSON.stringify(chunk.content)
}
// Abort after receiving a few chunks
if (chunkCount >= 5) {
abortController.abort()
}
}
} catch (error) {
wasAborted = true
expect(error).toBeDefined()
const errorMessage = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase()
const isAbortError = errorMessage.includes('abort') ||
errorMessage.includes('cancel') ||
error instanceof DOMException ||
(error as { name?: string })?.name === 'AbortError'
expect(isAbortError).toBe(true)
}
expect(chunkCount).toBeGreaterThanOrEqual(5)
expect(content.length).toBeGreaterThan(0)
expect(wasAborted).toBe(true)
console.log(`✅ LangChain Anthropic streaming client disconnect passed (${chunkCount} chunks before abort)`)
})
})
describe('Tool Calling', () => {
it('should make tool calls', async () => {
if (skipTests) return
const model = getLangChainAnthropic()
const modelWithTools = model.bindTools([weatherTool])
const messages = convertToLangChainMessages(SINGLE_TOOL_CALL_MESSAGES)
const response = await modelWithTools.invoke(messages)
expect(response).toBeDefined()
expect(response.tool_calls).toBeDefined()
expect(response.tool_calls!.length).toBeGreaterThan(0)
expect(response.tool_calls![0].name).toBe('get_weather')
console.log(`✅ LangChain Anthropic tool calling passed`)
})
})
})
// ============================================================================
// Google via LangChain
// ============================================================================
describe('LangChain Google GenAI', () => {
const skipTests = !isProviderAvailable('gemini')
beforeAll(() => {
if (skipTests) {
console.log('⚠️ Skipping LangChain Google GenAI tests: GEMINI_API_KEY not set')
}
})
describe('Simple Chat', () => {
it('should complete a simple chat', async () => {
if (skipTests) return
const model = getLangChainGoogle()
const messages = convertToLangChainMessages(SIMPLE_CHAT_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
expect(response.content).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain Google GenAI simple chat passed`)
})
})
describe('Multi-turn Conversation', () => {
it('should handle multi-turn conversation', async () => {
if (skipTests) return
const model = getLangChainGoogle()
const messages = convertToLangChainMessages(MULTI_TURN_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.toLowerCase()).toMatch(/paris|population|million|people/i)
console.log(`✅ LangChain Google GenAI multi-turn conversation passed`)
})
})
describe('Streaming Chat', () => {
it('should stream chat response', async () => {
if (skipTests) return
const model = getLangChainGoogle()
const messages = convertToLangChainMessages(STREAMING_CHAT_MESSAGES)
const stream = await model.stream(messages)
let content = ''
for await (const chunk of stream) {
if (chunk.content) {
content += typeof chunk.content === 'string' ? chunk.content : JSON.stringify(chunk.content)
}
}
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain Google GenAI streaming chat passed`)
})
})
describe('Tool Calling', () => {
it('should make tool calls', async () => {
if (skipTests) return
const model = getLangChainGoogle()
const modelWithTools = model.bindTools([weatherTool])
const messages = convertToLangChainMessages(SINGLE_TOOL_CALL_MESSAGES)
const response = await modelWithTools.invoke(messages)
expect(response).toBeDefined()
expect(response.tool_calls).toBeDefined()
expect(response.tool_calls!.length).toBeGreaterThan(0)
expect(response.tool_calls![0].name).toBe('get_weather')
console.log(`✅ LangChain Google GenAI tool calling passed`)
})
})
describe('Structured Output', () => {
it('should generate structured output', async () => {
if (skipTests) return
const model = getLangChainGoogle()
const ResponseSchema = z.object({
answer: z.string().describe('The answer to the question'),
confidence: z.number().min(0).max(1).describe('Confidence score'),
})
try {
const structuredModel = model.withStructuredOutput(ResponseSchema)
const response = await structuredModel.invoke('What is 2 + 2?')
expect(response).toBeDefined()
expect(response.answer).toBeDefined()
expect(typeof response.confidence).toBe('number')
console.log(`✅ LangChain Google GenAI structured output passed`)
} catch (error) {
console.log(`⚠️ LangChain Google GenAI structured output test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
})
// ============================================================================
// Cross-Provider Token Counting Tests
// ============================================================================
describe('Token Counting', () => {
describe('OpenAI Token Counting', () => {
const skipTests = !isProviderAvailable('openai')
it('should return token usage in response', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const messages = convertToLangChainMessages(SIMPLE_CHAT_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
// LangChain includes usage info in response_metadata
if (response.response_metadata) {
const usage = response.response_metadata.usage || response.response_metadata.tokenUsage
if (usage) {
expect(usage.prompt_tokens || usage.promptTokens).toBeGreaterThan(0)
expect(usage.completion_tokens || usage.completionTokens).toBeGreaterThan(0)
}
}
console.log(`✅ LangChain OpenAI token counting passed`)
})
})
describe('Anthropic Token Counting', () => {
const skipTests = !isProviderAvailable('anthropic')
it('should return token usage in response', async () => {
if (skipTests) return
const model = getLangChainAnthropic()
const messages = convertToLangChainMessages(SIMPLE_CHAT_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
// Anthropic includes usage info in usage_metadata
if (response.usage_metadata) {
expect(response.usage_metadata.input_tokens).toBeGreaterThan(0)
expect(response.usage_metadata.output_tokens).toBeGreaterThan(0)
}
console.log(`✅ LangChain Anthropic token counting passed`)
})
})
describe('Google GenAI Token Counting', () => {
const skipTests = !isProviderAvailable('gemini')
it('should return token usage in response', async () => {
if (skipTests) return
const model = getLangChainGoogle()
const messages = convertToLangChainMessages(SIMPLE_CHAT_MESSAGES)
const response = await model.invoke(messages)
expect(response).toBeDefined()
// Google includes usage info in response_metadata
if (response.response_metadata) {
const usage = response.response_metadata.usage
if (usage) {
expect(usage.promptTokenCount || usage.prompt_tokens).toBeGreaterThan(0)
}
}
console.log(`✅ LangChain Google GenAI token counting passed`)
})
})
})
// ============================================================================
// Cross-Provider Structured Output Tests
// ============================================================================
describe('Comprehensive Structured Output', () => {
// Complex schema for testing
const RecipeSchema = z.object({
name: z.string().describe('Name of the recipe'),
ingredients: z.array(z.object({
item: z.string().describe('Ingredient name'),
amount: z.string().describe('Amount needed'),
})).describe('List of ingredients'),
steps: z.array(z.string()).describe('Cooking steps'),
prepTime: z.number().describe('Preparation time in minutes'),
cookTime: z.number().describe('Cooking time in minutes'),
})
describe('OpenAI Complex Structured Output', () => {
const skipTests = !isProviderAvailable('openai')
it('should generate complex structured output', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const structuredModel = model.withStructuredOutput(RecipeSchema)
const response = await structuredModel.invoke('Give me a simple recipe for scrambled eggs')
expect(response).toBeDefined()
expect(response.name).toBeDefined()
expect(Array.isArray(response.ingredients)).toBe(true)
expect(Array.isArray(response.steps)).toBe(true)
expect(typeof response.prepTime).toBe('number')
expect(typeof response.cookTime).toBe('number')
console.log(`✅ LangChain OpenAI complex structured output passed`)
})
})
describe('Anthropic Complex Structured Output', () => {
const skipTests = !isProviderAvailable('anthropic')
it('should generate complex structured output', async () => {
if (skipTests) return
const model = getLangChainAnthropic()
try {
const structuredModel = model.withStructuredOutput(RecipeSchema)
const response = await structuredModel.invoke('Give me a simple recipe for scrambled eggs')
expect(response).toBeDefined()
expect(response.name).toBeDefined()
expect(Array.isArray(response.ingredients)).toBe(true)
expect(Array.isArray(response.steps)).toBe(true)
expect(typeof response.prepTime).toBe('number')
expect(typeof response.cookTime).toBe('number')
console.log(`✅ LangChain Anthropic complex structured output passed`)
} catch (error) {
console.log(`⚠️ LangChain Anthropic complex structured output test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
})
// ============================================================================
// Extended Thinking Tests
// ============================================================================
describe('Thinking/Extended Reasoning', () => {
describe('OpenAI Thinking', () => {
const skipTests = !isProviderAvailable('openai')
it('should support extended reasoning', async () => {
if (skipTests) return
const thinkingModel = getProviderModel('openai', 'thinking')
// Skip if no thinking model available
if (!thinkingModel) {
console.log('⚠️ Skipping OpenAI thinking test: No thinking model configured')
return
}
const baseUrl = getIntegrationUrl('openai')
const apiKey = hasApiKey('openai') ? getApiKey('openai') : 'dummy-key'
const model = new ChatOpenAI({
modelName: thinkingModel,
openAIApiKey: apiKey,
configuration: {
baseURL: baseUrl,
},
maxTokens: 2000,
timeout: 300000,
})
try {
const response = await model.invoke([
new HumanMessage('What is 15% of 80? Think through this step by step.'),
])
expect(response).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain OpenAI thinking passed`)
} catch (error) {
console.log(`⚠️ LangChain OpenAI thinking test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
describe('Anthropic Thinking', () => {
const skipTests = !isProviderAvailable('anthropic')
it('should support extended reasoning', async () => {
if (skipTests) return
const thinkingModel = getProviderModel('anthropic', 'thinking')
// Skip if no thinking model available
if (!thinkingModel) {
console.log('⚠️ Skipping Anthropic thinking test: No thinking model configured')
return
}
const baseUrl = getIntegrationUrl('anthropic')
const apiKey = hasApiKey('anthropic') ? getApiKey('anthropic') : 'dummy-key'
const model = new ChatAnthropic({
modelName: thinkingModel,
anthropicApiKey: apiKey,
anthropicApiUrl: baseUrl,
maxTokens: 8000,
maxRetries: 3,
})
try {
// Anthropic thinking requires specific configuration
const response = await model.invoke([
new HumanMessage('What is 15% of 80? Think through this step by step.'),
])
expect(response).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain Anthropic thinking passed`)
} catch (error) {
console.log(`⚠️ LangChain Anthropic thinking test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
describe('Google GenAI Thinking', () => {
const skipTests = !isProviderAvailable('gemini')
it('should support extended reasoning', async () => {
if (skipTests) return
const thinkingModel = getProviderModel('gemini', 'thinking')
// Skip if no thinking model available
if (!thinkingModel) {
console.log('⚠️ Skipping Google GenAI thinking test: No thinking model configured')
return
}
const apiKey = hasApiKey('gemini') ? getApiKey('gemini') : 'dummy-key'
const model = new ChatGoogleGenerativeAI({
modelName: thinkingModel,
apiKey,
maxOutputTokens: 2048,
})
try {
const response = await model.invoke([
new HumanMessage('What is 15% of 80? Think through this step by step.'),
])
expect(response).toBeDefined()
const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
expect(content.length).toBeGreaterThan(0)
console.log(`✅ LangChain Google GenAI thinking passed`)
} catch (error) {
console.log(`⚠️ LangChain Google GenAI thinking test skipped: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
})
})
})
// ============================================================================
// Streaming Tool Calls Tests
// ============================================================================
describe('Streaming Tool Calls', () => {
describe('OpenAI Streaming Tool Calls', () => {
const skipTests = !isProviderAvailable('openai')
it('should stream tool calls', async () => {
if (skipTests) return
const model = getLangChainOpenAI()
const modelWithTools = model.bindTools([weatherTool, calculatorTool])
const messages = convertToLangChainMessages(SINGLE_TOOL_CALL_MESSAGES)
const stream = await modelWithTools.stream(messages)
let hasToolCall = false
for await (const chunk of stream) {
if (chunk.tool_calls && chunk.tool_calls.length > 0) {
hasToolCall = true
}
if (chunk.tool_call_chunks && chunk.tool_call_chunks.length > 0) {
hasToolCall = true
}
}
// Tool calls might not always stream, but the stream should complete
console.log(`✅ LangChain OpenAI streaming tool calls passed (tool call detected: ${hasToolCall})`)
})
})
})
})

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "bundler",
"lib": ["ES2022"],
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"skipLibCheck": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"outDir": "./dist",
"rootDir": ".",
"baseUrl": ".",
"paths": {
"@/*": ["./src/*"]
},
"types": ["node", "vitest/globals"],
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"forceConsistentCasingInFileNames": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true
},
"include": ["src/**/*", "tests/**/*", "vitest.config.ts"],
"exclude": ["node_modules", "dist"]
}

View File

@@ -0,0 +1,54 @@
import { resolve } from 'path'
import { defineConfig } from 'vitest/config'
export default defineConfig({
test: {
// Test discovery
include: ['tests/**/*.test.ts'],
exclude: ['node_modules', 'dist'],
// Global test settings
globals: true,
environment: 'node',
// Timeout settings (5 minutes per test, matching Python)
testTimeout: 300000,
hookTimeout: 60000,
// Run tests sequentially to avoid API rate limiting
pool: 'forks',
poolOptions: {
forks: {
singleFork: true,
},
},
// Reporter configuration
reporters: ['verbose'],
// Setup files
setupFiles: ['./tests/setup.ts'],
// Retry flaky tests (matching Python pytest-rerunfailures)
retry: 2,
// Coverage configuration
coverage: {
provider: 'v8',
reporter: ['text', 'html', 'json'],
include: ['src/**/*.ts'],
exclude: ['node_modules', 'dist', 'tests'],
},
// Environment variables
env: {
NODE_ENV: 'test',
},
},
resolve: {
alias: {
'@': resolve(__dirname, './src'),
},
},
})