first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/tests/integrations/python/.python-version
+++ b/tests/integrations/python/.python-version
@@ -0,0 +1 @@
+3.12
--- a/tests/integrations/python/README.md
+++ b/tests/integrations/python/README.md
--- a/tests/integrations/python/config.json
+++ b/tests/integrations/python/config.json
@@ -0,0 +1,348 @@
+{
+    "$schema": "https://www.getbifrost.ai/schema",
+    "mcp": {
+        "client_configs": [
+            {
+                "name": "sse_mcp",
+                "connection_type": "sse",
+                "connection_string": "env.MCP_SSE_URL",
+                "auth_type": "headers",
+                "headers": {
+                    "Authorization": "env.MCP_SSE_AUTHORIZATION",
+                    "ENV_EXA_API_KEY": "env.MCP_SSE_API_KEY"
+                },
+                "tools_to_execute": ["*"],
+                "tools_to_auto_execute": ["*"]
+            }
+        ]
+    },
+    "providers": {
+        "openai": {
+            "keys": [
+                {
+                    "name": "OpenAI API Key",
+                    "value": "env.OPENAI_API_KEY",
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": true
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "elevenlabs": {
+            "keys": [
+                {
+                    "name": "ElevenLabs API Key",
+                    "value": "env.ELEVENLABS_API_KEY",
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": false
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "xai": {
+            "keys": [
+                {
+                    "name": "Xai API Key",
+                    "value": "env.XAI_API_KEY",
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": false
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "huggingface": {
+            "keys": [
+                {
+                    "name": "Hugging Face API Key",
+                    "value": "env.HUGGING_FACE_API_KEY",
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": false
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "anthropic": {
+            "keys": [
+                {
+                    "name": "Anthropic API Key",
+                    "value": "env.ANTHROPIC_API_KEY",
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": true
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "gemini": {
+            "keys": [
+                {
+                    "name": "Gemini API Key",
+                    "value": "env.GEMINI_API_KEY",
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": true
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "vertex": {
+            "keys": [
+                {
+                    "name": "Vertex API Key",
+                    "vertex_key_config": {
+                        "project_id": "env.VERTEX_PROJECT_ID",
+                        "region": "env.GOOGLE_LOCATION",
+                        "auth_credentials": "env.VERTEX_CREDENTIALS"
+                    },
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "mistral": {
+            "keys": [
+                {
+                    "name": "Mistral API Key",
+                    "value": "env.MISTRAL_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "cohere": {
+            "keys": [
+                {
+                    "name": "Cohere API Key",
+                    "value": "env.COHERE_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "parasail": {
+            "keys": [
+                {
+                    "name": "Parasail API Key",
+                    "value": "env.PARASAIL_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "groq": {
+            "keys": [
+                {
+                    "name": "Groq API Key",
+                    "value": "env.GROQ_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "perplexity": {
+            "keys": [
+                {
+                    "name": "Perplexity API Key",
+                    "value": "env.PERPLEXITY_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "cerebras": {
+            "keys": [
+                {
+                    "name": "Cerebras API Key",
+                    "value": "env.CEREBRAS_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "openrouter": {
+            "keys": [
+                {
+                    "name": "OpenRouter API Key",
+                    "value": "env.OPENROUTER_API_KEY",
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "azure": {
+            "keys": [
+                {
+                    "name": "Azure API Key",
+                    "value": "env.AZURE_API_KEY",
+                    "azure_key_config": {
+                        "endpoint": "env.AZURE_ENDPOINT",
+                        "api_version": "env.AZURE_API_VERSION",
+                        "deployments": {
+                            "gpt-4o": "gpt-4o",
+                            "gpt-4o-mini": "gpt-4o-mini",
+                            "gpt-4o-mini-tts": "gpt-4o-mini-tts",
+                            "o1": "o1",
+                            "gpt-4o-batch": "gpt-4o-batch",
+                            "whisper": "whisper",
+                            "text-embedding-3-small": "text-embedding-3-small",
+                            "gpt-image-1": "gpt-image-1"
+                        }
+                    },
+                    "weight": 1,
+                    "models": ["*"]
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "bedrock": {
+            "keys": [
+                {
+                    "name": "Bedrock API Key",
+                    "bedrock_key_config": {
+                        "access_key": "env.AWS_ACCESS_KEY_ID",
+                        "secret_key": "env.AWS_SECRET_ACCESS_KEY",
+                        "region": "env.AWS_REGION",
+                        "arn": "env.AWS_ARN"
+                    },
+                    "weight": 1,
+                    "models": ["*"],
+                    "use_for_batch_api": true
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "replicate": {
+            "keys": [
+                {
+                    "name": "Replicate API Key",
+                    "value": "env.REPLICATE_API_KEY",
+                    "weight": 1
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "runway": {
+            "keys": [
+                {
+                    "name": "Runway API Key",
+                    "value": "env.RUNWAY_API_KEY",
+                    "weight": 1
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        },
+        "nebius": {
+            "keys": [
+                {
+                    "name": "Nebius API Key",
+                    "value": "env.NEBIUS_API_KEY",
+                    "weight": 1
+                }
+            ],
+            "network_config": {
+                "default_request_timeout_in_seconds": 300
+            }
+        }
+    },
+    "config_store": {
+        "enabled": true,
+        "type": "sqlite",
+        "config": {
+            "path": "./config.db"
+        }
+    },
+    "logs_store": {
+        "enabled": true,
+        "type": "sqlite",
+        "config": {
+            "path": "./logs.db"
+        }
+    },
+    "governance": {
+        "virtual_keys": [
+            {
+                "name": "Test Key",
+                "id": "vk-test",
+                "value": "sk-bf-test-key",
+                "is_active": true,
+                "provider_configs": [
+                    { "provider": "openai",      "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "elevenlabs",  "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "xai",         "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "huggingface", "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "anthropic",   "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "gemini",      "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "vertex",      "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "mistral",     "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "cohere",      "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "parasail",    "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "groq",        "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "perplexity",  "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "cerebras",    "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "openrouter",  "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "azure",       "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 },
+                    { "provider": "bedrock",     "allowed_models": ["*"], "key_ids": ["*"], "weight": 1.0 }
+                ]
+            }
+        ]
+    },
+    "client": {
+        "drop_excess_requests": false,
+        "initial_pool_size": 300,
+        "allowed_origins": [
+            "*"
+        ],
+        "enable_logging": true,
+        "enforce_auth_on_inference": false,
+        "allow_direct_keys": false,
+        "max_request_body_size_mb": 100
+    }
+}
--- a/tests/integrations/python/config.yml
+++ b/tests/integrations/python/config.yml
@@ -0,0 +1,942 @@
+# Bifrost Integration Tests Configuration
+# This file centralizes all configuration for AI integration clients and test settings
+
+# Bifrost Gateway Configuration
+# All integrations route through Bifrost as a proxy/gateway
+bifrost:
+  base_url: "${BIFROST_BASE_URL:-http://localhost:8080}"
+
+  # Integration-specific endpoints (suffixes appended to base_url)
+  endpoints:
+    openai: "openai"
+    anthropic: "anthropic"
+    cohere: "cohere"
+    google: "genai"
+    "gemini_passthrough": "genai_passthrough"
+    "anthropic_passthrough": "anthropic_passthrough"
+    litellm: "litellm"
+    langchain: "langchain"
+    pydanticai: "pydanticai"
+    bedrock: "bedrock"
+    azure: "openai"  # Azure uses OpenAI routes with /openai/deployments/{id} paths
+
+  # Full URLs constructed as: {base_url.rstrip('/')}/{endpoints[integration]}
+  # Examples:
+  # - OpenAI: http://localhost:8080/openai
+  # - Anthropic: http://localhost:8080/anthropic
+  # - Google: http://localhost:8080/genai
+  # - LiteLLM: http://localhost:8080/litellm
+  # - LangChain: http://localhost:8080/langchain
+
+# API Configuration
+api:
+  timeout: 30 # seconds
+  max_retries: 3
+  retry_delay: 1 # seconds
+
+# Provider model configurations
+# Integrations (openai, anthropic, google, litellm, langchain) map to these providers
+providers:
+  openai:
+    chat: "gpt-4o"
+    vision: "gpt-4o"
+    file: "gpt-4o"
+    tools: "gpt-4o-mini"
+    speech: "tts-1"
+    transcription: "whisper-1"
+    embeddings: "text-embedding-3-small"
+    image_generation: "gpt-image-1"
+    image_edit: "gpt-image-1"
+    streaming: "gpt-4o-mini"
+    thinking: "gpt-5.1"
+    batch_file_upload: "gpt-4o-mini"
+    batch_list: "gpt-4o"
+    batch_retrieve: "gpt-4o"
+    batch_cancel: "gpt-4o"
+    batch_inline: "gpt-4o"
+    file_upload: "gpt-4o-mini"
+    file_list: "gpt-4o-mini"
+    file_retrieve: "gpt-4o-mini"
+    file_delete: "gpt-4o-mini"
+    file_content: "gpt-4o-mini"
+    count_tokens: "gpt-4o-mini"
+    video: "sora-2"
+    alternatives:
+      - "gpt-4"
+      - "gpt-4-turbo-preview"
+      - "gpt-4o"
+      - "gpt-3.5-turbo"
+    
+  azure:
+    chat: "gpt-4o"
+    vision: "gpt-4o"
+    tools: "gpt-4o-mini"
+    streaming: "gpt-4o-mini"
+    speech: "gpt-4o-mini-tts"
+    transcription: "whisper"
+    embeddings: "text-embedding-3-small"
+    image_generation: "gpt-image-1"
+    thinking: "o1"
+    batch_file_upload: "gpt-4o-batch"
+    batch_list: "gpt-4o-batch"
+    batch_retrieve: "gpt-4o-batch"
+    batch_cancel: "gpt-4o-batch"
+    file_upload: "gpt-4o"
+    file_list: "gpt-4o"
+    file_retrieve: "gpt-4o"
+    file_delete: "gpt-4o"
+    file_content: "gpt-4o"
+
+  xai:
+    chat: "grok-4-0709"
+    vision: "grok-2-vision-1212"
+    tools: "grok-4-0709"
+    streaming: "grok-4-0709"
+    thinking: "grok-3-mini"
+    
+  anthropic:
+    chat: "claude-sonnet-4-5-20250929"
+    vision: "claude-sonnet-4-5-20250929"
+    file: "claude-sonnet-4-5-20250929"
+    tools: "claude-sonnet-4-5-20250929"
+    streaming: "claude-sonnet-4-5-20250929"
+    thinking: "claude-opus-4-5"
+    batch_file_upload: "claude-sonnet-4-20250514"
+    batch_inline: "claude-sonnet-4-20250514"
+    batch_list: "claude-sonnet-4-20250514"
+    batch_retrieve: "claude-sonnet-4-20250514"
+    batch_cancel: "claude-sonnet-4-20250514"
+    file_upload: "claude-sonnet-4-20250514"
+    file_list: "claude-sonnet-4-20250514"
+    file_retrieve: "claude-sonnet-4-20250514"
+    file_delete: "claude-sonnet-4-20250514"
+    file_content: "claude-sonnet-4-20250514"
+    count_tokens: "claude-sonnet-4-5-20250929"
+    alternatives:
+      - "claude-3-sonnet-20240229"
+      - "claude-3-opus-20240229"
+      - "claude-3-5-sonnet-20241022"
+      - "claude-3-haiku-20240307"
+    
+  gemini:
+    chat: "gemini-3-flash-preview"
+    vision: "gemini-3-flash-preview"
+    tools: "gemini-3-flash-preview"
+    file: "gemini-2.5-flash"
+    thinking: "gemini-3-pro-preview"
+    speech: "gemini-2.5-flash-preview-tts"
+    transcription: "gemini-2.5-flash"
+    embeddings: "gemini-embedding-001"
+    image_generation: "gemini-2.5-flash-image"
+    image_edit: "gemini-3-pro-image-preview"
+    imagen: "imagen-4.0-generate-001"
+    video: "veo-3.1-generate-preview"
+    streaming: "gemini-3-flash-preview"
+    batch_create: "gemini-2.5-flash"
+    batch_inline: "gemini-2.5-flash"
+    batch_file_upload: "gemini-2.5-flash"
+    batch_list: "gemini-2.5-flash"
+    batch_retrieve: "gemini-2.5-flash"
+    batch_cancel: "gemini-2.5-flash"
+    batch_s3: "gemini-2.5-flash"
+    file_upload: "gemini-2.0-flash"
+    file_list: "gemini-2.0-flash"
+    file_content: "gemini-2.0-flash"
+    file_download: "gemini-2.0-flash"
+    file_retrieve: "gemini-2.0-flash"
+    file_delete: "gemini-2.0-flash"
+    count_tokens: "gemini-2.5-flash"
+    alternatives:
+      - "gemini-1.5-pro"
+      - "gemini-1.5-flash"
+      - "gemini-1.0-pro"
+      - "gemini-2.0-flash-001"
+  
+  vertex:
+    chat: "gemini-2.5-flash"
+    vision: "claude-sonnet-4-5"
+    tools: "gemini-2.5-flash"
+    file: "claude-sonnet-4-5"
+    thinking: "gemini-2.5-pro"
+    embeddings: "gemini-embedding-001"
+    image_generation: "imagen-4.0-generate-001"
+    image_edit: "imagen-3.0-capability-001"
+    imagen: "imagen-4.0-generate-001"
+    streaming: "gemini-2.5-flash"
+    count_tokens: "claude-sonnet-4-5"
+    video: "veo-3.1-generate-preview"
+  bedrock:
+    chat: "global.anthropic.claude-sonnet-4-20250514-v1:0"
+    vision: "global.anthropic.claude-sonnet-4-20250514-v1:0"
+    file: "global.anthropic.claude-sonnet-4-20250514-v1:0"
+    tools: "global.anthropic.claude-sonnet-4-20250514-v1:0"
+    streaming: "global.anthropic.claude-sonnet-4-20250514-v1:0"
+    thinking: "us.anthropic.claude-opus-4-5-20251101-v1:0"
+    text_completion: "mistral.mistral-7b-instruct-v0:2"
+    embeddings: "global.cohere.embed-v4:0"
+    image_generation: "amazon.titan-image-generator-v2:0"
+    image_variation: "amazon.titan-image-generator-v2:0"
+    batch_inline: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    image_edit: "amazon.nova-canvas-v1:0"
+    batch_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    batch_retrieve: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    batch_cancel: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    batch_file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    batch_s3: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    file_upload: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    file_list: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    file_delete: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    file_content: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    count_tokens: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+    alternatives:
+      - "anthropic.claude-3-opus-20240229-v1:0"
+    
+  cohere:
+    chat: "command-a-03-2025"
+    vision: "command-a-vision-07-2025"
+    tools: "command-a-03-2025"
+    embeddings: "embed-v4.0"
+    streaming: "command-a-03-2025"
+    count_tokens: "command-a-03-2025"
+    alternatives:
+      - "command-r-plus"
+  
+  huggingface:
+    image_generation: "fal-ai/fal-ai/flux/dev"
+    image_edit: "fal-ai/fal-ai/flux-2/edit"
+  
+  nebius:
+    image_generation: "black-forest-labs/flux-schnell"
+  
+  replicate:
+    video: "openai/sora-2-pro"
+
+  runway:
+    video: "gen4.5"
+    
+    
+
+# Provider availability configuration
+# Maps provider names to their API key environment variables
+provider_api_keys:
+  openai: "OPENAI_API_KEY"
+  anthropic: "ANTHROPIC_API_KEY"
+  gemini: "GEMINI_API_KEY"
+  vertex: "VERTEX_API_KEY"
+  bedrock: "AWS_ACCESS_KEY_ID"
+  cohere: "COHERE_API_KEY"
+  xai: "XAI_API_KEY"
+  huggingface: "HUGGING_FACE_API_KEY"
+  nebius: "NEBIUS_API_KEY"
+  azure: "AZURE_API_KEY"
+  replicate: "REPLICATE_API_KEY"
+  runway: "RUNWAY_API_KEY"
+
+# Provider test scenarios - which tests each provider supports
+provider_scenarios:
+  openai:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    "web_search": true
+    image_url: true
+    image_base64: true
+    file_input: true
+    multiple_images: true
+    speech_synthesis: true
+    speech_synthesis_streaming: true
+    transcription: true
+    transcription_streaming: true
+    embeddings: true
+    image_generation: true
+    image_edit: true
+    thinking: true
+    prompt_caching: false
+    citations: false
+    list_models: true
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: true
+    pydantic_structured_output: true  # PydanticAI structured output works reliably with OpenAI
+    pydanticai_streaming: true  # PydanticAI streaming works with OpenAI
+    batch_file_upload: true
+    batch_create: true
+    batch_list: true
+    batch_retrieve: true
+    batch_cancel: true
+    batch_inline: true  # OpenAI supports inline requests for batch
+    batch_s3: false  # OpenAI does not use S3 for batch
+    file_upload: true
+    file_list: true
+    file_retrieve: true
+    file_delete: true
+    file_content: true
+    count_tokens: true
+    video_generation: false # disabled for now because of long running operations
+
+  azure:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    web_search: false
+    image_url: true
+    image_base64: true
+    file_input: false
+    multiple_images: true
+    speech_synthesis: true
+    speech_synthesis_streaming: true
+    transcription: true
+    transcription_streaming: true
+    embeddings: true
+    image_generation: true
+    image_edit: false
+    thinking: true
+    prompt_caching: false
+    citations: false
+    list_models: true
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: false
+    pydantic_structured_output: false
+    pydanticai_streaming: false
+    batch_file_upload: true
+    batch_create: false
+    batch_list: true
+    batch_retrieve: true
+    batch_cancel: true
+    batch_inline: false
+    batch_s3: false
+    file_upload: true
+    file_list: true
+    file_retrieve: true
+    file_delete: true
+    file_content: true
+    count_tokens: false
+
+  xai:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    image_url: true
+    image_base64: false
+    file_input: false
+    multiple_images: false
+    thinking: true
+    list_models: true
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: true
+    pydantic_structured_output: true 
+    pydanticai_streaming: true 
+    
+  anthropic:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    web_search: true
+    image_url: true
+    image_base64: true
+    file_input: true
+    file_input_text: true
+    multiple_images: true
+    speech_synthesis: false
+    speech_synthesis_streaming: false
+    transcription: false
+    transcription_streaming: false
+    embeddings: false
+    thinking: true
+    prompt_caching: true
+    citations: true
+    list_models: true
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: false
+    pydantic_structured_output: true  # PydanticAI structured output works with Anthropic
+    pydanticai_streaming: true  # PydanticAI streaming works with Anthropic
+    batch_file_upload: true  # Anthropic batch API uses inline requests, not files
+    batch_create: true
+    batch_list: true
+    batch_retrieve: true
+    batch_cancel: true
+    batch_inline: true  # Anthropic uses inline requests for batch
+    batch_s3: false  # Anthropic does not use S3 for batch
+    file_upload: true
+    file_list: true
+    file_retrieve: true
+    file_delete: true
+    file_content: true
+    count_tokens: true
+    
+  gemini:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    image_url: false  # Gemini requires base64 or file upload
+    image_base64: true
+    file_input: true
+    multiple_images: false
+    speech_synthesis: true
+    speech_synthesis_streaming: true
+    transcription: true
+    transcription_streaming: true
+    embeddings: true
+    image_generation: true  # Gemini image generation via responseModalities
+    image_edit: true  # Gemini image editing
+    imagen: true  # Imagen via :predict endpoint
+    imagen_edit: true  # Imagen editing via image_edit model
+    thinking: true
+    video_generation: false # disabled for now because of long running operations
+    prompt_caching: false
+    citations: false
+    list_models: true
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: true
+    pydantic_structured_output: false  # PydanticAI structured output unreliable via Bifrost for Gemini
+    pydanticai_streaming: false  # PydanticAI GoogleModel streaming has asyncio issues
+    batch_file_upload: true  # Gemini supports file upload via Files API
+    batch_create: true
+    batch_list: true
+    batch_retrieve: true
+    batch_cancel: true
+    batch_inline: true  # Gemini uses inline requests for batch (synchronous)
+    batch_s3: false  # Gemini does not use S3 for batch
+    file_upload: true
+    file_list: true
+    file_retrieve: true
+    file_delete: true
+    file_content: false  # Gemini doesn't support direct file download
+    count_tokens: true
+    context_caching: true  # Gemini context caching (Caches API) via Bifrost passthrough
+
+  vertex:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    image_url: false  # Gemini requires base64 or file upload
+    image_base64: true
+    file_input: true
+    multiple_images: false
+    speech_synthesis: false
+    speech_synthesis_streaming: false
+    transcription: false
+    transcription_streaming: false
+    embeddings: true
+    image_generation: true
+    image_edit: true
+    imagen: true  # Imagen via :predict endpoint
+    imagen_edit: true  # Imagen editing via image_edit model
+    thinking: true
+    prompt_caching: false
+    list_models: true
+    video_generation: false # disabled for now because of long running operations
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: true
+    pydantic_structured_output: false  # PydanticAI structured output unreliable via Bifrost for Gemini
+    pydanticai_streaming: false  # PydanticAI GoogleModel streaming has asyncio issues
+    batch_file_upload: false  # Gemini supports file upload via Files API
+    batch_create: false
+    batch_list: false
+    batch_retrieve: false
+    batch_cancel: false
+    batch_inline: false  # Gemini uses inline requests for batch (synchronous)
+    batch_s3: false  # Gemini does not use S3 for batch
+    file_upload: false
+    file_list: false
+    file_retrieve: false
+    file_delete: false
+    file_content: false  # Gemini doesn't support direct file download
+    count_tokens: false
+
+  bedrock:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: true
+    image_url: false
+    image_base64: true
+    file_input: true
+    file_input_text: true
+    multiple_images: false
+    speech_synthesis: false
+    speech_synthesis_streaming: false
+    transcription: false
+    transcription_streaming: false
+    embeddings: true
+    thinking: true
+    prompt_caching: true
+    citations: false
+    list_models: true
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: true
+    pydantic_structured_output: false  # Bedrock not supported in PydanticAI tests
+    pydanticai_streaming: false  # Bedrock not supported in PydanticAI tests
+    batch_file_upload: true  # Bedrock uses S3 wrapper for file uploads
+    batch_create: true
+    batch_list: true
+    batch_retrieve: true
+    batch_cancel: true
+    batch_inline: false  # Bedrock batch uses S3, not inline requests via API
+    batch_s3: true  # Bedrock uses S3 for batch input/output
+    file_upload: true  # Bedrock uses S3 wrapper for file storage
+    file_list: true  # Bedrock lists files in S3 bucket
+    file_retrieve: true  # Bedrock retrieves S3 object metadata
+    file_delete: true  # Bedrock deletes S3 objects
+    file_content: true  # Bedrock downloads S3 object content
+    image_generation: true  # Bedrock supports image generation via invoke (Titan, SA, cross-provider)
+    image_edit: true  # Bedrock supports image editing via invoke (Titan, SA)
+    image_variation: true  # Bedrock supports image variation via invoke (Titan IMAGE_VARIATION)
+    count_tokens: true  # Bedrock supports token counting via CountTokens API
+
+  cohere:
+    simple_chat: true
+    multi_turn_conversation: true
+    streaming: true
+    tool_calls: true
+    multiple_tool_calls: true
+    end2end_tool_calling: true
+    automatic_function_calling: false
+    image_url: true
+    image_base64: true
+    multiple_images: true
+    speech_synthesis: false
+    speech_synthesis_streaming: false
+    transcription: false
+    transcription_streaming: false
+    embeddings: true
+    thinking: false
+    prompt_caching: false
+    citations: false
+    list_models: false
+    responses: true
+    responses_image: true
+    text_completion: false
+    langchain_structured_output: true
+    pydantic_structured_output: false  # PydanticAI CohereModel doesn't reliably support structured output
+    pydanticai_streaming: false  # PydanticAI CohereModel doesn't implement streaming
+    batch_file_upload: false
+    batch_create: false
+    batch_list: false
+    batch_retrieve: false
+    batch_cancel: false
+    batch_inline: false  # Cohere does not support batch API
+    batch_s3: false  # Cohere does not support batch API
+    file_upload: false  # Cohere does not support Files API
+    file_list: false
+    file_retrieve: false
+    file_delete: false
+    file_content: false
+    count_tokens: true
+  
+  huggingface:
+    image_generation: true
+    image_edit: true
+  
+  nebius:
+    image_generation: true
+  
+  replicate:
+    video_generation: false # disabled for now because of long running operations
+  
+  runway:
+    video_generation: false # disabled for now because of long running operations
+
+# Scenario to capability mapping
+# Maps test scenario names to their corresponding capability types
+scenario_capabilities:
+  simple_chat: "chat"
+  multi_turn_conversation: "chat"
+  responses: "chat"
+  responses_image: "vision"
+  text_completion: "chat"
+  streaming: "streaming"
+  tool_calls: "tools"
+  multiple_tool_calls: "tools"
+  end2end_tool_calling: "tools"
+  automatic_function_calling: "tools"
+  web_search: "chat"
+  image_url: "vision"
+  image_base64: "vision"
+  file_input: "file"
+  file_input_text: "file"
+  multiple_images: "vision"
+  speech_synthesis: "speech"
+  speech_synthesis_streaming: "speech"
+  transcription: "transcription"
+  transcription_streaming: "transcription"
+  embeddings: "embeddings"
+  image_generation: "image_generation"  # Uses image_generation model
+  image_edit: "image_edit"  # Uses image_edit model
+  imagen: "imagen"  # Uses imagen model (Gemini/Vertex)
+  imagen_edit: "image_edit"  # Uses image_edit model for Imagen editing
+  thinking: "thinking"
+  prompt_caching: "chat"
+  citations: "chat"
+  list_models: "chat"
+  langchain_structured_output: "chat"  # LangChain structured output uses chat capability
+  count_tokens: "count_tokens"  # Token counting capability
+  pydantic_structured_output: "chat"  # Structured output uses chat capability
+  pydanticai_streaming: "streaming"  # PydanticAI streaming uses streaming capability
+  batch_file_upload: "batch_file_upload"  # Uses batch_file_upload model directly
+  batch_create: "batch_create"
+  batch_list: "batch_list"
+  batch_retrieve: "batch_retrieve"
+  batch_cancel: "batch_cancel"
+  batch_inline: "batch_inline"  # Uses batch_inline model directly
+  batch_s3: "batch_s3"  # Uses batch_s3 model directly
+  file_upload: "file_upload"  # Uses file_upload model directly
+  file_list: "file_list"  # Uses file_list model directly
+  file_retrieve: "file_retrieve"  # Uses file_retrieve model directly
+  file_delete: "file_delete"  # Uses file_delete model directly
+  file_content: "file_content"  # Uses file_content model directly
+  count_tokens: "chat"
+  video_generation: "video"
+  context_caching: "chat"  # Gemini Caches API (passthrough)
+
+# Model capabilities matrix
+model_capabilities:
+  # OpenAI Models
+  "gpt-3.5-turbo":
+    chat: true
+    tools: true
+    vision: false
+    streaming: true
+    max_tokens: 4096
+    context_window: 4096
+
+  "gpt-4":
+    chat: true
+    tools: true
+    vision: false
+    streaming: true
+    max_tokens: 8192
+    context_window: 8192
+
+  "gpt-4o":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    max_tokens: 4096
+    context_window: 128000
+
+  "gpt-4o-mini":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    speech: false
+    transcription: false
+    max_tokens: 4096
+    context_window: 128000
+
+  # OpenAI Speech Models
+  "tts-1":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: true
+    transcription: false
+    max_tokens: null
+    context_window: null
+
+  "tts-1-hd":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: true
+    transcription: false
+    max_tokens: null
+    context_window: null
+
+  # OpenAI Transcription Models
+  "whisper-1":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: false
+    transcription: true
+    embeddings: false
+    max_tokens: null
+    context_window: null
+
+  # OpenAI Embedding Models
+  "text-embedding-3-small":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: false
+    transcription: false
+    embeddings: true
+    max_tokens: null
+    context_window: 8191
+    dimensions: 1536
+
+  "text-embedding-3-large":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: false
+    transcription: false
+    embeddings: true
+    max_tokens: null
+    context_window: 8191
+    dimensions: 3072
+
+  "text-embedding-ada-002":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: false
+    transcription: false
+    embeddings: true
+    max_tokens: null
+    context_window: 8191
+    dimensions: 1536
+
+  # Anthropic Models
+  "claude-3-haiku-20240307":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    max_tokens: 4096
+    context_window: 200000
+
+  "claude-3-sonnet-20240229":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    max_tokens: 4096
+    context_window: 200000
+
+  "claude-3-opus-20240229":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    max_tokens: 4096
+    context_window: 200000
+
+  # Google Models
+  "gemini-pro":
+    chat: true
+    tools: true
+    vision: false
+    streaming: true
+    max_tokens: 8192
+    context_window: 32768
+
+  "gemini-2.0-flash-001":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    max_tokens: 8192
+    context_window: 32768
+
+  "gemini-1.5-pro":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    max_tokens: 8192
+    context_window: 1000000
+
+  # Gemini Transcription Models
+  "gemini-2.5-flash":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    speech: false
+    transcription: true
+    embeddings: false
+    max_tokens: 8192
+    context_window: 1000000
+    audio_max_duration: 34200  # 9.5 hours in seconds
+
+  "gemini-2.5-pro":
+    chat: true
+    tools: true
+    vision: true
+    streaming: true
+    speech: false
+    transcription: true
+    embeddings: false
+    max_tokens: 8192
+    context_window: 2000000
+    audio_max_duration: 34200  # 9.5 hours in seconds
+
+  # Gemini TTS Models
+  "gemini-2.5-flash-preview-tts":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: true
+    transcription: false
+    embeddings: false
+    max_tokens: 32000  # 32k token context window for TTS
+    context_window: 32000
+    audio_format: "pcm"
+    sample_rate: 24000
+    channels: 1
+
+  "gemini-2.5-pro-preview-tts":
+    chat: false
+    tools: false
+    vision: false
+    streaming: false
+    speech: true
+    transcription: false
+    embeddings: false
+    max_tokens: 32000  # 32k token context window for TTS
+    context_window: 32000
+    audio_format: "pcm"
+    sample_rate: 24000
+    channels: 1
+
+  # Mistral Models
+  "mistral-7b-instruct":
+    chat: true
+    tools: false
+    vision: false
+    streaming: true
+    max_tokens: 4096
+    context_window: 32768
+
+  "mistral-8x7b-instruct":
+    chat: true
+    tools: true
+    vision: false
+    streaming: true
+    max_tokens: 4096
+    context_window: 32768
+
+# Test configuration
+test_settings:
+  # Maximum tokens for test responses
+  max_tokens:
+    chat: 100
+    vision: 200
+    tools: 100
+    complex: 300
+    speech: null  # Speech doesn't use token limits
+    transcription: null  # Transcription doesn't use token limits
+    embeddings: null  # Embeddings don't use token limits (text is the input)
+
+  # Timeout settings for tests
+  timeouts:
+    simple: 30 # seconds
+    complex: 60 # seconds
+
+  # Retry settings for flaky tests
+  retries:
+    max_attempts: 3
+    delay: 2 # seconds
+
+# Integration-specific settings
+integration_settings:
+  openai:
+    organization: "${OPENAI_ORG_ID:-}"
+    project: "${OPENAI_PROJECT_ID:-}"
+
+  anthropic:
+    version: "2023-06-01"
+
+  google:
+    project_id: "${GOOGLE_PROJECT_ID:-}"
+    location: "${GOOGLE_LOCATION:-us-central1}"
+
+  litellm:
+    drop_params: true
+    debug: false
+
+  langchain:
+    debug: false
+    streaming: true
+
+  bedrock:
+    region: "${AWS_REGION:-us-west-2}"
+    s3_bucket: "${AWS_S3_BUCKET:-}"
+    batch_role_arn: "${AWS_ARN:-}"
+    output_s3_prefix: "${AWS_OUTPUT_S3_PREFIX:-bifrost-batch-output/}"
+
+  azure:
+    api_version: "${AZURE_API_VERSION:-2024-10-21}"
+
+# Environment-specific overrides
+environments:
+  development:
+    api:
+      timeout: 60
+      max_retries: 5
+    test_settings:
+      timeouts:
+        simple: 60
+        complex: 120
+
+  production:
+    api:
+      timeout: 15
+      max_retries: 2
+    test_settings:
+      timeouts:
+        simple: 20
+        complex: 40
+
+# Virtual key testing configuration
+# When enabled, cross-provider tests will run twice: with and without the x-bf-vk header
+virtual_key:
+  enabled: true
+  value: "sk-bf-test-key"
+
+# Logging configuration
+logging:
+  level: "INFO"
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  file: "tests.log"
--- a/tests/integrations/python/dummy-gcp-credentials.json
+++ b/tests/integrations/python/dummy-gcp-credentials.json
@@ -0,0 +1,12 @@
+{
+  "type": "service_account",
+  "project_id": "dummy-bifrost-project",
+  "private_key_id": "dummy-key-id-12345",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCY+aj4fvYTj4l9\nYcgnEg7f9Y2zcck8bvYrhIY/m0NJpfUV2rOAbvgHJXUgobmUcgf6E9b76AWVN/Wm\nk6dxE+PWj1/DwkaYk4uDHpWFOn6HkF7ypLeGMamSnU+OfKFoUrRW8NfoMgh+uGVt\nwMh82qBztaTJKjN2BlxBepgR0iZKG81ySkyhaUL1Jh99E3AcNULkkp+VHTD51lw6\n4H0B197tY18GUZ+iPK3Laj9HBVOAjxqsCs4cMsWZ16R+dfZr8ZcDC0zodhcVNSsX\nA7uKZ5tKChJQEzHhk4o8ywnrsyd4E9FHKHsbs+Ye7K5qrTOxpKpDiy6DOPlknZ0/\nzLjk+SARAgMBAAECggEAAepUIktYZnmvblI//Sj8rHdJRoJGOqxNcnaW+4b+euUW\nQ1CspV1+U51amCBvza6kZ+0gaKEhi3lAAhfYQFx5YGtHTbHtKwjTL6oDrKKTncx9\nz/oJYeV6vVTOGGCjZQx2f30DwJZE0XG/1Qpl6L9SSBv14HlwY/6US75snRsWvCAc\nYlWJCozpn5ycSZDbqQBSVF3ueeHoH4ahL5Iw2NELAk87HLBGbtfvfwWpnQqlJtIi\ndQWqYYBHrqk5ThQNcJl1o3oBY7MMVE6/jWZbr+aIXtgfQlG3j+Z6PD6/7g3z65Yy\nxiOrypqpEm8UhmmxoF7UjSci+32NLj7SfdgXM2QLgwKBgQDP3iOCWBmzK72tWeF9\nkNKTXn/6niJHgaPhVTVYsAEoQMcWBtGSUF7QRWZt4qu8APRTwhThFpLVLSZOk50I\ndf9xFhWqubsif/ox1Fbd3SGswwoWhHvGHQ/JH/75akpMKTXkkLDomNNL49kNwW1E\nmb1EJPOeyuOxhw8gP7v8qJ8cAwKBgQC8ZaLgYUmMsc+IDpREN22fXNmoBE0OgrIK\nBLa9rQRAbzdmKcNxLpGAEsyiuPOrgD/9U2G9hM6kztCN53Ho86rphFiHHgN2NJfH\n/Jz/jTtM3UPKv0QCHuLTZknLFeYE3A0jNYFpRi/hjy2n0E4Gtp/0Y0ZULseMSvM5\naN7CWGS5WwKBgQDPCWb+vTcjwO5UCdDQ2v0RsS7w9K4Z4KLUnaTbp7oPWK2yX6o+\n+/PjpywFSJ5aS+0Ou6FGK9ClqSmdW+MteTGqdh+wgvtDuon9NYwrwMN4qm6SzPPm\n+C0v2sF/tIE56FX4SLEbipPx44fd7okhqarcg51uzJAK0wWazkAzv9Nx9wKBgEUk\n9EtvyWO22tkvqKEEytoDZOrycSmTNC7THhKtTnMrnmSDjXSbx9D+lVZflSbrkhCy\nqpu5A3KfaRG70SXTUHYWGbu1e0XF9bLzdtegCRSj3L6rxhUVKuC1mP3NUreT38p9\nV7rAhNA/EV2W6RwzqK80RFqfNKO72lrGr4MamBUjAoGBAJzy/47STnaW23aPutJF\nU23Kp5QDSkZzCniDBNIbuxlgZ5x2m4wK0FPRwWBcuvisG3G9VXohEfxJ0/IG8t6/\nOH1tVXYeR9pWtGIWEZuzFHL38ji4/BL3i94gW26GntJrr1ut94KHN1ynqkYRP/gK\ngRU91/0vXG+SOTubYUh5G5w3\n-----END PRIVATE KEY-----\n",
+  "client_email": "dummy-bifrost@dummy-bifrost-project.iam.gserviceaccount.com",
+  "client_id": "123456789012345678901",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dummy-bifrost%40dummy-bifrost-project.iam.gserviceaccount.com"
+}
--- a/tests/integrations/python/pyproject.toml
+++ b/tests/integrations/python/pyproject.toml
@@ -0,0 +1,126 @@
+[project]
+name = "bifrost-integration-tests"
+version = "0.1.0"
+description = "Production-ready end-to-end test suite for testing AI integrations through Bifrost proxy"
+readme = "README.md"
+requires-python = ">=3.11"
+
+dependencies = [
+    # Core testing framework
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    # Environment and configuration
+    "python-dotenv>=1.0.0",
+    "PyYAML>=6.0",
+    # Image processing
+    "Pillow>=9.0.0",
+    # HTTP requests for debugging
+    "requests>=2.28.0",
+    # Type hints
+    "typing-extensions>=4.0.0",
+    # Test reporting
+    "pytest-html>=3.1.0",
+    "pytest-cov>=4.0.0",
+    # AI/ML SDK dependencies
+    "openai>=1.30.0",
+    "anthropic>=0.25.0",
+    "litellm==1.80.5",
+    "langchain-openai==0.1.0",
+    "langchain-core==0.3.81",
+    "langchain-anthropic==0.1.0",
+    "langchain-google-genai==4.1.1",
+    "langchain-mistralai==0.1.0",
+    "langgraph>=0.1.0",
+    "mistralai>=0.4.0",
+    "google-genai>=1.50.0",
+    "pydantic-ai>=0.1.0",
+    "boto3>=1.34.0",
+    # Testing utilities
+    "websocket-client>=1.6.0",
+    "httpx>=0.25.0",
+    "pytest-timeout>=2.1.0",
+    "pytest-mock>=3.11.0",
+    "pytest-rerunfailures>=11.0",
+    "langchain-google-vertexai>=3.1.0",
+    "langchain-tests>=1.0.2",
+    "langchain>=1.1.0",
+    "langchain-community>=0.4.1",
+    "langchain-aws>=1.1.0",
+    "pytest-xdist>=3.8.0",
+    "pyasn1>=0.6.2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "black>=23.0.0",   # Code formatting
+    "flake8>=6.0.0",   # Linting
+    "mypy>=1.5.0",     # Type checking
+]
+
+[tool.pytest.ini_options]
+# Test discovery
+testpaths = ["."]
+python_files = "test_*.py"
+python_classes = "Test*"
+python_functions = "test_*"
+
+# Output formatting
+addopts = [
+    "-v",
+    "-s",  # Show print statements (no output capture)
+    "--tb=short",
+    "--strict-markers",
+    "--disable-warnings",
+    "--color=yes",
+]
+
+# Logging configuration
+log_cli = true
+log_cli_level = "ERROR"
+log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+
+# Timeout settings (3 minutes per test)
+timeout = 300
+
+# Markers for test categorization
+markers = [
+    "integration: marks tests as integration tests",
+    "slow: marks tests as slow running",
+    "e2e: marks tests as end-to-end tests",
+    "tool_calling: marks tests as tool calling tests",
+    "flaky: marks tests as flaky with automatic retries (reruns=3, reruns_delay=2)",
+]
+
+# Minimum version
+minversion = "7.0"
+
+[tool.black]
+line-length = 100
+target-version = ['py38', 'py39', 'py310', 'py311']
+include = '\.pyi?$'
+
+[tool.mypy]
+python_version = "3.11"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = false
+ignore_missing_imports = true
+
+[tool.coverage.run]
+source = ["tests"]
+omit = ["*/tests/*", "*/venv/*", "*/.venv/*"]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+]
+
+
+[tool.uv]
+exclude-newer = "2026-04-08"
--- a/tests/integrations/python/run_all_tests.py
+++ b/tests/integrations/python/run_all_tests.py
@@ -0,0 +1,343 @@
+#!/usr/bin/env python3
+"""
+Bifrost Integration End-to-End Test Runner
+
+This script runs all integration end-to-end tests for Bifrost.
+It can run tests individually or all together, providing comprehensive
+reporting and flexible execution options.
+
+Usage:
+    python run_all_tests.py                    # Run all tests
+    python run_all_tests.py --integration openai  # Run specific integration
+    python run_all_tests.py --list             # List available integrations
+    python run_all_tests.py --parallel         # Run tests in parallel
+    python run_all_tests.py --verbose          # Verbose output
+"""
+
+import argparse
+import subprocess
+import sys
+import time
+import os
+from pathlib import Path
+from typing import List, Dict, Optional
+import concurrent.futures
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+
+class BifrostTestRunner:
+    """Main test runner for Bifrost integration tests"""
+
+    def __init__(self):
+        self.test_dir = Path(__file__).parent
+        self.integrations = {
+            "openai": {
+                "file": "tests/integrations/test_openai.py",
+                "description": "OpenAI Python SDK integration tests",
+                "env_vars": ["OPENAI_API_KEY"],
+            },
+            "anthropic": {
+                "file": "tests/integrations/test_anthropic.py",
+                "description": "Anthropic Python SDK integration tests",
+                "env_vars": ["ANTHROPIC_API_KEY"],
+            },
+            "litellm": {
+                "file": "tests/integrations/test_litellm.py",
+                "description": "LiteLLM integration tests",
+                "env_vars": ["OPENAI_API_KEY"],  # LiteLLM can use OpenAI key
+            },
+            "langchain": {
+                "file": "tests/integrations/test_langchain.py",
+                "description": "LangChain integration tests",
+                "env_vars": [
+                    "OPENAI_API_KEY",
+                    "ANTHROPIC_API_KEY",
+                ],  # LangChain uses multiple providers
+            },
+            "google": {
+                "file": "tests/integrations/test_google.py",
+                "description": "Google GenAI integration tests",
+                "env_vars": ["GOOGLE_API_KEY"],
+            },
+            "bedrock": {
+                "file": "tests/integrations/test_bedrock.py",
+                "description": "Bedrock integration tests",
+                "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
+            },
+        }
+
+        self.results = {}
+
+    def check_environment(self, integration: str) -> bool:
+        """Check if required environment variables are set for an integration"""
+        config = self.integrations[integration]
+        missing_vars = []
+
+        for var in config["env_vars"]:
+            if not os.getenv(var):
+                missing_vars.append(var)
+
+        if missing_vars:
+            print(
+                f"⚠ Skipping {integration}: Missing environment variables: {', '.join(missing_vars)}"
+            )
+            return False
+
+        return True
+
+    def run_integration_test(self, integration: str, verbose: bool = False) -> Dict:
+        """Run tests for a specific integration"""
+        if integration not in self.integrations:
+            return {"success": False, "error": f"Unknown integration: {integration}"}
+
+        config = self.integrations[integration]
+        test_file = self.test_dir / config["file"]
+
+        if not test_file.exists():
+            return {"success": False, "error": f"Test file not found: {test_file}"}
+
+        # Check environment variables
+        if not self.check_environment(integration):
+            return {
+                "success": False,
+                "error": "Missing required environment variables",
+                "skipped": True,
+            }
+
+        print(f"\n{'='*60}")
+        print(f"Running {integration.upper()} Integration Tests")
+        print(f"{'='*60}")
+        print(f"Description: {config['description']}")
+        print(f"Test file: {config['file']}")
+
+        start_time = time.time()
+
+        try:
+            # Run the test with pytest
+            cmd = [sys.executable, "-m", "pytest", str(test_file)]
+
+            # Add pytest flags for better output
+            if verbose:
+                cmd.extend(["-v", "-s"])  # verbose and don't capture output
+            else:
+                cmd.append("-q")  # quiet mode
+
+            if verbose:
+                result = subprocess.run(
+                    cmd, cwd=self.test_dir, text=True, capture_output=False, timeout=300
+                )
+            else:
+                result = subprocess.run(
+                    cmd, cwd=self.test_dir, text=True, capture_output=True, timeout=300
+                )
+
+            elapsed_time = time.time() - start_time
+
+            success = result.returncode == 0
+
+            return {
+                "success": success,
+                "return_code": result.returncode,
+                "stdout": result.stdout if not verbose else "",
+                "stderr": result.stderr if not verbose else "",
+                "elapsed_time": elapsed_time,
+            }
+
+        except subprocess.TimeoutExpired:
+            return {
+                "success": False,
+                "error": "Test timed out (5 minutes)",
+                "elapsed_time": 300,
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "elapsed_time": time.time() - start_time,
+            }
+
+    def run_all_tests(self, parallel: bool = False, verbose: bool = False) -> None:
+        """Run all integration tests"""
+        print("Bifrost Integration End-to-End Test Suite")
+        print("=" * 50)
+        print(f"Running tests for {len(self.integrations)} integrations")
+        print(f"Parallel execution: {'Enabled' if parallel else 'Disabled'}")
+        print(f"Verbose output: {'Enabled' if verbose else 'Disabled'}")
+
+        # Check Bifrost availability
+        bifrost_url = os.getenv("BIFROST_BASE_URL", "http://localhost:8080")
+        print(f"Bifrost URL: {bifrost_url}")
+
+        start_time = time.time()
+
+        if parallel:
+            self._run_parallel(verbose)
+        else:
+            self._run_sequential(verbose)
+
+        total_time = time.time() - start_time
+        self._print_summary(total_time)
+
+    def _run_sequential(self, verbose: bool) -> None:
+        """Run tests sequentially"""
+        for integration in self.integrations:
+            self.results[integration] = self.run_integration_test(integration, verbose)
+
+    def _run_parallel(self, verbose: bool) -> None:
+        """Run tests in parallel"""
+        print("\nRunning tests in parallel...")
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
+            # Submit all tests
+            future_to_integration = {
+                executor.submit(
+                    self.run_integration_test, integration, verbose
+                ): integration
+                for integration in self.integrations
+            }
+
+            # Collect results
+            for future in concurrent.futures.as_completed(future_to_integration):
+                integration = future_to_integration[future]
+                try:
+                    self.results[integration] = future.result()
+                except Exception as e:
+                    self.results[integration] = {"success": False, "error": str(e)}
+
+    def _print_summary(self, total_time: float) -> None:
+        """Print test summary"""
+        print(f"\n{'='*60}")
+        print("TEST SUMMARY")
+        print(f"{'='*60}")
+
+        passed = 0
+        failed = 0
+        skipped = 0
+
+        for integration, result in self.results.items():
+            status = (
+                "SKIPPED"
+                if result.get("skipped")
+                else ("PASSED" if result["success"] else "FAILED")
+            )
+            elapsed = result.get("elapsed_time", 0)
+
+            if result.get("skipped"):
+                skipped += 1
+                print(
+                    f"⚠ {integration:12} {status:8} - {result.get('error', 'Unknown error')}"
+                )
+            elif result["success"]:
+                passed += 1
+                print(f"✓ {integration:12} {status:8} - {elapsed:.2f}s")
+            else:
+                failed += 1
+                error_msg = result.get("error", "Unknown error")
+                print(f"✗ {integration:12} {status:8} - {error_msg}")
+
+                # Print stderr if available
+                if "stderr" in result and result["stderr"]:
+                    print(f"  Error output: {result['stderr'][:200]}...")
+
+        print(f"\n{'='*60}")
+        print(
+            f"Total: {len(self.integrations)} | Passed: {passed} | Failed: {failed} | Skipped: {skipped}"
+        )
+        print(f"Total time: {total_time:.2f} seconds")
+        print(f"{'='*60}")
+
+        # Exit with appropriate code
+        if failed > 0:
+            sys.exit(1)
+        else:
+            print("All tests completed successfully!")
+
+    def list_integrations(self) -> None:
+        """List available integrations"""
+        print("Available Integrations:")
+        print("=" * 30)
+
+        for integration, config in self.integrations.items():
+            env_status = "✓" if self.check_environment(integration) else "✗"
+            print(f"{env_status} {integration:12} - {config['description']}")
+            print(f"   Required env vars: {', '.join(config['env_vars'])}")
+            print()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run Bifrost integration end-to-end tests",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python run_all_tests.py                        # Run all tests
+  python run_all_tests.py --integration openai   # Run OpenAI tests only
+  python run_all_tests.py --parallel --verbose   # Run all tests in parallel with verbose output
+  python run_all_tests.py --list                 # List available integrations
+        """,
+    )
+
+    parser.add_argument(
+        "--integration", "-i", help="Run tests for specific integration only"
+    )
+
+    parser.add_argument(
+        "--list",
+        "-l",
+        action="store_true",
+        help="List available integrations and their status",
+    )
+
+    parser.add_argument(
+        "--parallel",
+        "-p",
+        action="store_true",
+        help="Run tests in parallel (faster but less readable output)",
+    )
+
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable verbose output (shows test output in real-time)",
+    )
+
+    args = parser.parse_args()
+
+    runner = BifrostTestRunner()
+
+    if args.list:
+        runner.list_integrations()
+        return
+
+    if args.integration:
+        if args.integration not in runner.integrations:
+            print(f"Error: Unknown integration '{args.integration}'")
+            print(f"Available integrations: {', '.join(runner.integrations.keys())}")
+            sys.exit(1)
+
+        result = runner.run_integration_test(args.integration, args.verbose)
+        if result["success"]:
+            print(f"\n✓ {args.integration} tests passed!")
+        else:
+            error_msg = result.get("error", "Unknown error")
+            print(f"\n✗ {args.integration} tests failed: {error_msg}")
+
+            # Show stdout/stderr if available
+            if result.get("stdout"):
+                print("\n--- Test Output ---")
+                print(result["stdout"])
+            if result.get("stderr"):
+                print("\n--- Error Output ---")
+                print(result["stderr"])
+
+            sys.exit(1)
+    else:
+        runner.run_all_tests(args.parallel, args.verbose)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/integrations/python/run_integration_tests.py
+++ b/tests/integrations/python/run_integration_tests.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+"""
+Integration-specific test runner for Bifrost integration tests.
+
+This script runs tests for each integration independently using their native SDKs.
+No more complex gateway conversions - just direct testing!
+"""
+
+import os
+import sys
+import argparse
+import subprocess
+from pathlib import Path
+from typing import List, Optional
+
+
+def check_api_keys():
+    """Check which API keys are available"""
+    keys = {
+        "openai": os.getenv("OPENAI_API_KEY"),
+        "anthropic": os.getenv("ANTHROPIC_API_KEY"),
+        "google": os.getenv("GOOGLE_API_KEY"),
+        "litellm": os.getenv("LITELLM_API_KEY"),
+        "bedrock": os.getenv("AWS_ACCESS_KEY_ID"),
+    }
+
+    available = [integration for integration, key in keys.items() if key]
+    missing = [integration for integration, key in keys.items() if not key]
+
+    return available, missing
+
+
+def run_integration_tests(
+    integrations: List[str], test_pattern: Optional[str] = None, verbose: bool = False
+):
+    """Run tests for specified integrations"""
+
+    results = {}
+
+    for integration in integrations:
+        print(f"\n{'='*60}")
+        print(f"🧪 TESTING {integration.upper()} INTEGRATION")
+        print(f"{'='*60}")
+
+        # Build pytest command with absolute path relative to script location
+        script_dir = Path(__file__).parent
+        test_file = script_dir / "tests" / "integrations" / f"test_{integration}.py"
+
+        # Check if test file exists
+        if not test_file.exists():
+            print(f"❌ Test file not found: {test_file}")
+            results[integration] = {"error": f"Test file not found: {test_file}"}
+            continue
+
+        cmd = ["python", "-m", "pytest", str(test_file)]
+
+        if test_pattern:
+            cmd.extend(["-k", test_pattern])
+
+        if verbose:
+            cmd.append("-v")
+        else:
+            cmd.append("-q")
+
+        # Remove integration-specific marker (not needed for file-based selection)
+        # cmd.extend(["-m", integration])
+
+        # Run the tests
+        try:
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                check=True,
+            )
+            results[integration] = {
+                "returncode": result.returncode,
+                "stdout": result.stdout,
+                "stderr": "",  # stderr is now captured in stdout
+            }
+
+            # Print results
+            print(f"✅ {integration.upper()} tests PASSED")
+
+            if verbose:
+                print(result.stdout)
+
+        except subprocess.CalledProcessError as e:
+            print(f"❌ {integration.upper()} tests FAILED")
+            results[integration] = {
+                "returncode": e.returncode,
+                "stdout": e.stdout,
+                "stderr": "",  # stderr is captured in stdout
+            }
+
+            # Always print output on failure to show what went wrong
+            if e.stdout:
+                print(e.stdout)
+
+        except Exception as e:
+            print(f"❌ Error running {integration} tests: {e}")
+            results[integration] = {"error": str(e)}
+
+    return results
+
+
+def print_summary(
+    results: dict, available_integrations: List[str], missing_integrations: List[str]
+):
+    """Print final summary"""
+    print(f"\n{'='*80}")
+    print("🎯 FINAL SUMMARY")
+    print(f"{'='*80}")
+
+    # API Key Status
+    print(f"\n🔑 API Key Status:")
+    for integration in available_integrations:
+        print(f"  ✅ {integration.upper()}: Available")
+
+    for integration in missing_integrations:
+        print(f"  ❌ {integration.upper()}: Missing API key")
+
+    # Test Results
+    print(f"\n📊 Test Results:")
+    passed_integrations = []
+    failed_integrations = []
+
+    for integration, result in results.items():
+        if "error" in result:
+            print(f"  💥 {integration.upper()}: Error - {result['error']}")
+            failed_integrations.append(integration)
+        elif result["returncode"] == 0:
+            print(f"  ✅ {integration.upper()}: All tests passed")
+            passed_integrations.append(integration)
+        else:
+            print(f"  ❌ {integration.upper()}: Some tests failed")
+            failed_integrations.append(integration)
+
+    # Overall Status
+    total_tested = len(results)
+    total_passed = len(passed_integrations)
+
+    print(f"\n🏆 Overall Results:")
+    print(f"  Integrations tested: {total_tested}")
+    print(f"  Integrations passed: {total_passed}")
+    print(
+        f"  Success rate: {(total_passed/total_tested)*100:.1f}%"
+        if total_tested > 0
+        else "  Success rate: N/A"
+    )
+
+    if failed_integrations:
+        print(f"\n⚠️  Failed integrations: {', '.join(failed_integrations)}")
+        print("   Check the detailed output above for specific test failures.")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run integration-specific integration tests"
+    )
+    parser.add_argument(
+        "--integrations",
+        nargs="+",
+        choices=["openai", "anthropic", "google", "litellm", "all"],
+        default=["all"],
+        help="Integrations to test (default: all available)",
+    )
+    parser.add_argument(
+        "--test", help="Run specific test pattern (e.g., 'test_01_simple_chat')"
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    parser.add_argument(
+        "--check-keys", action="store_true", help="Only check API key availability"
+    )
+    parser.add_argument(
+        "--show-models",
+        action="store_true",
+        help="Show model configuration for all integrations",
+    )
+
+    args = parser.parse_args()
+
+    # Check API keys
+    available_integrations, missing_integrations = check_api_keys()
+
+    if args.check_keys:
+        print("🔑 API Key Status:")
+        for integration in available_integrations:
+            print(f"  ✅ {integration.upper()}: Available")
+        for integration in missing_integrations:
+            print(f"  ❌ {integration.upper()}: Missing")
+        return
+
+    if args.show_models:
+        # Import and show model configuration using absolute path
+        script_dir = Path(__file__).parent
+        models_path = script_dir / "tests" / "utils" / "models.py"
+
+        if not models_path.exists():
+            print(f"❌ Models file not found: {models_path}")
+            sys.exit(1)
+
+        # Add the parent directory to sys.path to enable the import
+        models_parent_dir = str(script_dir)
+        if models_parent_dir not in sys.path:
+            sys.path.insert(0, models_parent_dir)
+
+        try:
+            from tests.utils.models import print_model_summary
+
+            print_model_summary()
+        except ImportError as e:
+            print(f"❌ Could not import print_model_summary: {e}")
+            print(f"Tried to import from: {models_path}")
+            sys.exit(1)
+        return
+
+    # Determine which integrations to test
+    if "all" in args.integrations:
+        integrations_to_test = available_integrations
+        requested_integrations = [
+            "openai",
+            "anthropic",
+            "google",
+            "litellm",
+        ]  # all possible integrations
+    else:
+        integrations_to_test = [
+            p for p in args.integrations if p in available_integrations
+        ]
+        requested_integrations = args.integrations
+
+    if not integrations_to_test:
+        print("❌ No integrations available for testing. Please set API keys.")
+        print("\nRequired environment variables for requested integrations:")
+        for integration in requested_integrations:
+            if integration != "all":  # Skip the "all" keyword
+                api_key_name = f"{integration.upper()}_API_KEY"
+                print(f"  - {api_key_name}")
+        sys.exit(1)
+
+    # Calculate which requested integrations are missing API keys
+    requested_missing_integrations = [
+        integration
+        for integration in requested_integrations
+        if integration in missing_integrations
+    ]
+
+    # Show what we're about to test
+    print("🚀 Starting integration tests...")
+    print(f"📋 Testing integrations: {', '.join(integrations_to_test)}")
+    if requested_missing_integrations:
+        print(
+            f"⏭️  Skipping integrations (no API key): {', '.join(requested_missing_integrations)}"
+        )
+
+    # Run tests
+    results = run_integration_tests(integrations_to_test, args.test, args.verbose)
+
+    # Print summary
+    print_summary(results, available_integrations, requested_missing_integrations)
+
+    # Exit with appropriate code
+    failed_count = sum(
+        1 for r in results.values() if r.get("returncode", 1) != 0 or "error" in r
+    )
+    sys.exit(failed_count)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/integrations/python/tests/init.py
+++ b/tests/integrations/python/tests/init.py
@@ -0,0 +1,8 @@
+"""
+Bifrost Integration Tests
+
+Production-ready test suite for testing various AI integrations through Bifrost proxy.
+Supports multiple integrations with uniform test interface.
+"""
+
+__version__ = "1.0.0"
--- a/tests/integrations/python/tests/conftest.py
+++ b/tests/integrations/python/tests/conftest.py
@@ -0,0 +1,188 @@
+"""
+Pytest configuration for integration-specific tests.
+"""
+
+import pytest
+import os
+import logging
+
+
+def pytest_configure(config):
+    """Configure pytest with custom markers and logging"""
+    # Configure logging
+    logging.basicConfig(
+        level=logging.ERROR,
+        format='%(asctime)s [%(levelname)8s] %(name)s: %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    
+    # Add custom markers
+    config.addinivalue_line("markers", "openai: mark test as requiring OpenAI API key")
+    config.addinivalue_line(
+        "markers", "anthropic: mark test as requiring Anthropic API key"
+    )
+    config.addinivalue_line("markers", "google: mark test as requiring Google API key")
+    config.addinivalue_line("markers", "litellm: mark test as requiring LiteLLM setup")
+    config.addinivalue_line("markers", "azure: Azure OpenAI integration tests")
+    config.addinivalue_line(
+        "markers", "flaky: mark test as flaky with automatic retries (reruns=3, reruns_delay=2)"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    """Modify test collection to add markers based on test file names"""
+    # Add flaky marker to all tests for retry on failure
+    flaky_marker = pytest.mark.flaky(reruns=3, reruns_delay=2)
+    
+    for item in items:
+        # Add flaky marker to all tests
+        item.add_marker(flaky_marker)
+        
+        # Add markers based on test file location
+        if "test_openai" in item.nodeid:
+            item.add_marker(pytest.mark.openai)
+        elif "test_anthropic" in item.nodeid:
+            item.add_marker(pytest.mark.anthropic)
+        elif "test_google" in item.nodeid:
+            item.add_marker(pytest.mark.google)
+        elif "test_litellm" in item.nodeid:
+            item.add_marker(pytest.mark.litellm)
+        elif "test_azure" in item.nodeid:
+            item.add_marker(pytest.mark.azure)
+
+
+@pytest.fixture(scope="session")
+def api_keys():
+    """Collect all available API keys"""
+    return {
+        "openai": os.getenv("OPENAI_API_KEY"),
+        "anthropic": os.getenv("ANTHROPIC_API_KEY"),
+        "google": os.getenv("GOOGLE_API_KEY"),
+        "litellm": os.getenv("LITELLM_API_KEY"),
+        "azure": os.getenv("AZURE_API_KEY"),
+    }
+
+
+@pytest.fixture(scope="session")
+def available_integrations(api_keys):
+    """Determine which integrations are available based on API keys"""
+    available = []
+
+    if api_keys["openai"]:
+        available.append("openai")
+    if api_keys["anthropic"]:
+        available.append("anthropic")
+    if api_keys["google"]:
+        available.append("google")
+    if api_keys["litellm"]:
+        available.append("litellm")
+    if api_keys["azure"]:
+        available.append("azure")
+
+    return available
+
+
+@pytest.fixture
+def test_summary():
+    """Fixture to collect test results for summary reporting"""
+    results = {"passed": [], "failed": [], "skipped": []}
+    return results
+
+
+def pytest_runtest_makereport(item, call):
+    """Hook to capture test results"""
+    # Only record results during the "call" phase to avoid double counting
+    if call.when == "call":
+        # Extract integration and test info
+        integration = None
+        if "test_openai" in item.nodeid:
+            integration = "openai"
+        elif "test_anthropic" in item.nodeid:
+            integration = "anthropic"
+        elif "test_google" in item.nodeid:
+            integration = "google"
+        elif "test_litellm" in item.nodeid:
+            integration = "litellm"
+        elif "test_azure" in item.nodeid:
+            integration = "azure"
+
+        test_name = item.name
+
+        # Store result info
+        result_info = {
+            "integration": integration,
+            "test": test_name,
+            "nodeid": item.nodeid,
+        }
+
+        if hasattr(item.session, "test_results"):
+            if call.excinfo is None:
+                item.session.test_results["passed"].append(result_info)
+            else:
+                result_info["error"] = str(call.excinfo.value)
+                item.session.test_results["failed"].append(result_info)
+
+
+def pytest_sessionstart(session):
+    """Initialize test results collection"""
+    session.test_results = {"passed": [], "failed": [], "skipped": []}
+
+
+def pytest_sessionfinish(session, exitstatus):
+    """Print test summary at the end"""
+    results = session.test_results
+
+    print("\n" + "=" * 80)
+    print("INTEGRATION TEST SUMMARY")
+    print("=" * 80)
+
+    # Group results by integration
+    integration_results = {}
+
+    for result in results["passed"] + results["failed"] + results["skipped"]:
+        integration = result.get("integration", "unknown")
+        if integration and integration not in integration_results:
+            integration_results[integration] = {"passed": 0, "failed": 0, "skipped": 0}
+
+    for result in results["passed"]:
+        integration = result.get("integration", "unknown")
+        if integration and integration in integration_results:
+            integration_results[integration]["passed"] += 1
+
+    for result in results["failed"]:
+        integration = result.get("integration", "unknown")
+        if integration and integration in integration_results:
+            integration_results[integration]["failed"] += 1
+
+    for result in results["skipped"]:
+        integration = result.get("integration", "unknown")
+        if integration and integration in integration_results:
+            integration_results[integration]["skipped"] += 1
+
+    # Print summary by integration
+    for integration, counts in integration_results.items():
+        total = counts["passed"] + counts["failed"] + counts["skipped"]
+        if total > 0:
+            print(f"\n{integration.upper()} Integration:")
+            print(f"  ✅ Passed: {counts['passed']}")
+            print(f"  ❌ Failed: {counts['failed']}")
+            print(f"  ⏭️  Skipped: {counts['skipped']}")
+            print(f"  📊 Total: {total}")
+
+            if counts["passed"] > 0:
+                success_rate = (
+                    (counts["passed"] / (counts["passed"] + counts["failed"])) * 100
+                    if (counts["passed"] + counts["failed"]) > 0
+                    else 0
+                )
+                print(f"  🎯 Success Rate: {success_rate:.1f}%")
+
+    # Print failed tests details
+    if results["failed"]:
+        print(f"\n❌ FAILED TESTS ({len(results['failed'])}):")
+        for result in results["failed"]:
+            print(f"  • {result['integration']}: {result['test']}")
+            if "error" in result:
+                print(f"    Error: {result['error']}")
+
+    print("\n" + "=" * 80)
--- a/tests/integrations/python/tests/test_anthropic.py
+++ b/tests/integrations/python/tests/test_anthropic.py
--- a/tests/integrations/python/tests/test_azure.py
+++ b/tests/integrations/python/tests/test_azure.py
--- a/tests/integrations/python/tests/test_bedrock.py
+++ b/tests/integrations/python/tests/test_bedrock.py
--- a/tests/integrations/python/tests/test_google.py
+++ b/tests/integrations/python/tests/test_google.py
--- a/tests/integrations/python/tests/test_langchain.py
+++ b/tests/integrations/python/tests/test_langchain.py
--- a/tests/integrations/python/tests/test_litellm.py
+++ b/tests/integrations/python/tests/test_litellm.py
@@ -0,0 +1,911 @@
+"""
+LiteLLM Integration Tests
+
+🤖 MODELS USED:
+- Chat: gpt-3.5-turbo (OpenAI via LiteLLM)
+- Vision: gpt-4o (OpenAI via LiteLLM)
+- Tools: gpt-3.5-turbo (OpenAI via LiteLLM)
+- Speech: tts-1 (OpenAI via LiteLLM)
+- Transcription: whisper-1 (OpenAI via LiteLLM)
+- Embeddings: text-embedding-3-small (OpenAI via LiteLLM)
+- Alternatives: claude-3-haiku-20240307, gemini-pro, mistral-7b-instruct, gpt-4, command-r-plus
+
+Tests all 19 core scenarios using LiteLLM SDK directly:
+1. Simple chat
+2. Multi turn conversation
+3. Tool calls
+4. Multiple tool calls
+5. End2End tool calling
+6. Automatic function calling
+7. Image (url)
+8. Image (base64)
+9. Multiple images
+10. Complete end2end test with conversation history, tool calls, tool results and images
+11. Integration specific tests
+12. Error handling
+13. Streaming
+14. Google Gemini integration
+15. Mistral integration
+16. OpenAI embeddings via LiteLLM
+17. OpenAI speech synthesis via LiteLLM
+18. OpenAI transcription via LiteLLM
+19. Multi-provider comparison
+"""
+
+import pytest
+import json
+import litellm
+from typing import List, Dict, Any
+
+from .utils.common import (
+    Config,
+    SIMPLE_CHAT_MESSAGES,
+    MULTI_TURN_MESSAGES,
+    SINGLE_TOOL_CALL_MESSAGES,
+    MULTIPLE_TOOL_CALL_MESSAGES,
+    IMAGE_URL_MESSAGES,
+    IMAGE_BASE64_MESSAGES,
+    MULTIPLE_IMAGES_MESSAGES,
+    COMPLEX_E2E_MESSAGES,
+    INVALID_ROLE_MESSAGES,
+    STREAMING_CHAT_MESSAGES,
+    STREAMING_TOOL_CALL_MESSAGES,
+    WEATHER_TOOL,
+    CALCULATOR_TOOL,
+    mock_tool_response,
+    assert_valid_chat_response,
+    assert_has_tool_calls,
+    assert_valid_image_response,
+    assert_valid_error_response,
+    assert_error_propagation,
+    assert_valid_streaming_response,
+    collect_streaming_content,
+    extract_tool_calls,
+    get_api_key,
+    skip_if_no_api_key,
+    COMPARISON_KEYWORDS,
+    WEATHER_KEYWORDS,
+    LOCATION_KEYWORDS,
+    # Audio and embeddings test data
+    EMBEDDINGS_SINGLE_TEXT,
+    EMBEDDINGS_MULTIPLE_TEXTS,
+    EMBEDDINGS_SIMILAR_TEXTS,
+    SPEECH_TEST_INPUT,
+    generate_test_audio,
+    assert_valid_speech_response,
+    assert_valid_transcription_response,
+    assert_valid_embedding_response,
+    assert_valid_embeddings_batch_response,
+    calculate_cosine_similarity,
+    collect_streaming_transcription_content,
+    get_provider_voice,
+    get_provider_voices,
+    # Token counting test data
+    INPUT_TOKENS_SIMPLE_TEXT,
+    INPUT_TOKENS_LONG_TEXT,
+    INPUT_TOKENS_WITH_SYSTEM,
+)
+from .utils.config_loader import get_model
+from .utils.parametrize import (
+    get_cross_provider_params_for_scenario,
+    format_provider_model,
+)
+
+# LiteLLM-specific provider exclusions
+# Bedrock and Cohere don't work well through LiteLLM proxy
+# Gemini is excluded because LiteLLM routes it through Vertex AI-specific endpoints
+# that Bifrost's LiteLLM integration doesn't support
+LITELLM_EXCLUDED_PROVIDERS = ["bedrock", "cohere", "gemini"]
+
+
+@pytest.fixture
+def test_config():
+    """Test configuration"""
+    return Config()
+
+
+@pytest.fixture(autouse=True)
+def setup_litellm(monkeypatch):
+    """Setup LiteLLM with Bifrost configuration and dummy credentials"""
+    import os
+    from .utils.config_loader import get_integration_url, get_config
+    from unittest.mock import MagicMock
+
+    # Set dummy credentials since Bifrost handles actual authentication
+    os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
+    os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
+    os.environ["MISTRAL_API_KEY"] = "dummy-mistral-key-bifrost-handles-auth"
+
+    # For Google, set all possible API key environment variables
+    os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
+    os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
+    os.environ["VERTEX_PROJECT"] = "dummy-vertex-project"
+    os.environ["VERTEX_LOCATION"] = "us-central1"
+
+    # Set dummy Google Application Credentials to prevent Vertex AI from trying to authenticate
+    # LiteLLM will load these dummy credentials but all actual requests go through Bifrost
+    from pathlib import Path
+
+    dummy_creds_path = Path(__file__).parent.parent / "dummy-gcp-credentials.json"
+    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(dummy_creds_path)
+
+    # litellm._turn_on_debug()
+
+    # Mock credential refresh to prevent actual Google API calls
+    # Since Bifrost handles auth, we don't need LiteLLM to authenticate
+    def mock_refresh(self, request):
+        """Mock refresh that sets a dummy token - Bifrost handles real auth"""
+        import datetime
+
+        self.token = "dummy-access-token-bifrost-handles-auth"
+        self.expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
+
+    try:
+        from google.oauth2 import service_account
+
+        monkeypatch.setattr(service_account.Credentials, "refresh", mock_refresh)
+    except ImportError:
+        pass  # google-auth not installed
+
+    # Get Bifrost URL for LiteLLM
+    base_url = get_integration_url("litellm")
+    config = get_config()
+    integration_settings = config.get_integration_settings("litellm")
+    api_config = config.get_api_config()
+
+    # Configure LiteLLM globally
+    if base_url:
+        litellm.api_base = base_url
+
+    # Set timeout and other settings
+    litellm.request_timeout = api_config.get("timeout", 30)
+
+    # Apply integration-specific settings
+    if integration_settings.get("drop_params"):
+        litellm.drop_params = integration_settings["drop_params"]
+    if integration_settings.get("debug"):
+        litellm.set_verbose = integration_settings["debug"]
+
+
+def convert_to_litellm_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Convert common tool format to LiteLLM format (OpenAI-compatible)"""
+    return [{"type": "function", "function": tool} for tool in tools]
+
+
+class TestLiteLLMIntegration:
+    """Test suite for LiteLLM integration covering all 11 core scenarios"""
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "simple_chat", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_01_simple_chat(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 1: Simple chat interaction"""
+        response = litellm.completion(
+            model=model,
+            messages=SIMPLE_CHAT_MESSAGES,
+            max_tokens=100,
+        )
+
+        assert_valid_chat_response(response)
+        assert response.choices[0].message.content is not None
+        assert len(response.choices[0].message.content) > 0
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "multi_turn_conversation", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_02_multi_turn_conversation(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 2: Multi-turn conversation"""
+        response = litellm.completion(
+            model=model,
+            messages=MULTI_TURN_MESSAGES,
+            max_tokens=150,
+        )
+
+        assert_valid_chat_response(response)
+        content = response.choices[0].message.content.lower()
+        # Should mention population or numbers since we asked about Paris population
+        assert any(word in content for word in ["population", "million", "people", "inhabitants"])
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_03_single_tool_call(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 3: Single tool call"""
+        tools = convert_to_litellm_tools([WEATHER_TOOL])
+
+        response = litellm.completion(
+            model=model,
+            messages=SINGLE_TOOL_CALL_MESSAGES,
+            tools=tools,
+            max_tokens=100,
+        )
+
+        assert_has_tool_calls(response, expected_count=1)
+        tool_calls = extract_tool_calls(response)
+        assert tool_calls[0]["name"] == "get_weather"
+        assert "location" in tool_calls[0]["arguments"]
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "multiple_tool_calls", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_04_multiple_tool_calls(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 4: Multiple tool calls in one response"""
+        tools = convert_to_litellm_tools([WEATHER_TOOL, CALCULATOR_TOOL])
+
+        response = litellm.completion(
+            model=model,
+            messages=MULTIPLE_TOOL_CALL_MESSAGES,
+            tools=tools,
+            max_tokens=200,
+        )
+
+        assert_has_tool_calls(response, expected_count=2)
+        tool_calls = extract_tool_calls(response)
+        tool_names = [tc["name"] for tc in tool_calls]
+        assert "get_weather" in tool_names
+        assert "calculate" in tool_names
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "end2end_tool_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_05_end2end_tool_calling(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 5: Complete tool calling flow with responses"""
+        messages = [{"role": "user", "content": "What's the weather in Boston?"}]
+        tools = convert_to_litellm_tools([WEATHER_TOOL])
+
+        response = litellm.completion(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=100,
+        )
+
+        assert_has_tool_calls(response, expected_count=1)
+
+        # Add assistant's tool call to conversation
+        messages.append(response.choices[0].message)
+
+        # Add tool response
+        tool_calls = extract_litellm_tool_calls(response)
+        tool_response = mock_tool_response(tool_calls[0]["name"], tool_calls[0]["arguments"])
+
+        messages.append(
+            {
+                "role": "tool",
+                "tool_call_id": response.choices[0].message.tool_calls[0].id,
+                "content": tool_response,
+            }
+        )
+
+        # Get final response
+        final_response = litellm.completion(
+            model=get_model("litellm", "chat"), messages=messages, max_tokens=150
+        )
+
+        assert_valid_chat_response(final_response)
+        content = final_response.choices[0].message.content.lower()
+        weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
+        assert any(word in content for word in weather_location_keywords)
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "automatic_function_calling", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_06_automatic_function_calling(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 6: Automatic function calling"""
+        tools = convert_to_litellm_tools([CALCULATOR_TOOL])
+
+        response = litellm.completion(
+            model=model,
+            messages=[{"role": "user", "content": "Calculate 25 * 4 for me"}],
+            tools=tools,
+            tool_choice="auto",
+            max_tokens=100,
+        )
+
+        # Should automatically choose to use the calculator
+        assert_has_tool_calls(response, expected_count=1)
+        tool_calls = extract_litellm_tool_calls(response)
+        assert tool_calls[0]["name"] == "calculate"
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "image_url", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_07_image_url(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 7: Image analysis from URL"""
+        response = litellm.completion(
+            model=model,
+            messages=IMAGE_URL_MESSAGES,
+            max_tokens=200,
+        )
+
+        assert_valid_image_response(response)
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "image_base64", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_08_image_base64(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 8: Image analysis from base64"""
+        response = litellm.completion(
+            model=model,
+            messages=IMAGE_BASE64_MESSAGES,
+            max_tokens=200,
+        )
+
+        assert_valid_image_response(response)
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "multiple_images", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_09_multiple_images(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 9: Multiple image analysis"""
+        response = litellm.completion(
+            model=model,
+            messages=MULTIPLE_IMAGES_MESSAGES,
+            max_tokens=300,
+        )
+
+        assert_valid_image_response(response)
+        content = response.choices[0].message.content.lower()
+        # Should mention comparison or differences
+        assert any(
+            word in content for word in COMPARISON_KEYWORDS
+        ), f"Response should contain comparison keywords. Got content: {content}"
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "complex_e2end", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    @pytest.mark.skipif(True, reason="Known flaky test")
+    def test_10_complex_end2end(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 10: Complex end-to-end with conversation, images, and tools"""
+        messages = COMPLEX_E2E_MESSAGES.copy()
+        tools = convert_to_litellm_tools([WEATHER_TOOL])
+
+        # First, analyze the image
+        response1 = litellm.completion(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=300,
+        )
+
+        # Should either describe image or call weather tool (or both)
+        assert (
+            response1.choices[0].message.content is not None
+            or response1.choices[0].message.tool_calls is not None
+        )
+
+        # Add response to conversation
+        messages.append(response1.choices[0].message)
+
+        # If there were tool calls, handle them
+        if response1.choices[0].message.tool_calls:
+            for tool_call in response1.choices[0].message.tool_calls:
+                tool_name = tool_call.function.name
+                tool_args = json.loads(tool_call.function.arguments)
+                tool_response = mock_tool_response(tool_name, tool_args)
+
+                messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call.id,
+                        "content": tool_response,
+                    }
+                )
+
+            # Get final response after tool calls
+            final_response = litellm.completion(model=model, messages=messages, max_tokens=200)
+
+            assert_valid_chat_response(final_response)
+
+    @pytest.mark.skip(reason="known flaky test")
+    def test_11_integration_specific_features(self, test_config):
+        """Test Case 11: LiteLLM-specific features"""
+
+        # Test 1: Multiple integrations through LiteLLM
+        # Note: Gemini is excluded as LiteLLM routes it through Vertex AI-specific endpoints
+        integrations_to_test = [
+            "gpt-3.5-turbo",  # OpenAI
+            "claude-3-haiku-20240307",  # Anthropic
+            "mistral/mistral-7b-instruct",  # Mistral
+        ]
+
+        for model in integrations_to_test:
+            try:
+                response = litellm.completion(
+                    model=model,
+                    messages=[{"role": "user", "content": "Hello, how are you?"}],
+                    max_tokens=50,
+                )
+
+                assert_valid_chat_response(response)
+
+            except Exception as e:
+                # Some integrations might not be available, skip gracefully
+                pytest.skip(f"Integration {model} not available: {e}")
+
+        # Test 2: Function calling with specific tool choice
+        tools = convert_to_litellm_tools([CALCULATOR_TOOL, WEATHER_TOOL])
+
+        response2 = litellm.completion(
+            model=get_model("litellm", "chat"),
+            messages=[{"role": "user", "content": "What's 15 + 27?"}],
+            tools=tools,
+            tool_choice={"type": "function", "function": {"name": "calculate"}},
+            max_tokens=100,
+        )
+
+        assert_has_tool_calls(response2, expected_count=1)
+        tool_calls = extract_litellm_tool_calls(response2)
+        assert tool_calls[0]["name"] == "calculate"
+
+        # Test 3: Temperature and other parameters
+        response3 = litellm.completion(
+            model=get_model("litellm", "chat"),
+            messages=[{"role": "user", "content": "Tell me a creative story in one sentence."}],
+            temperature=0.9,
+            top_p=0.9,
+            max_tokens=100,
+        )
+
+        assert_valid_chat_response(response3)
+
+    def test_12_error_handling_invalid_roles(self, test_config):
+        """Test Case 12: Error handling for invalid roles"""
+        with pytest.raises(Exception) as exc_info:
+            litellm.completion(
+                model=get_model("litellm", "chat"),
+                messages=INVALID_ROLE_MESSAGES,
+                max_tokens=100,
+            )
+
+        # Verify the error is properly caught and contains role-related information
+        error = exc_info.value
+        assert_valid_error_response(error, "tester")
+        assert_error_propagation(error, "litellm")
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "streaming", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_13_streaming(self, test_config, provider, model):
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        """Test Case 13: Streaming chat completion"""
+        # Test basic streaming
+        stream = litellm.completion(
+            model=model,
+            messages=STREAMING_CHAT_MESSAGES,
+            max_tokens=200,
+            stream=True,
+        )
+
+        content, chunk_count, tool_calls_detected = collect_streaming_content(
+            stream, "openai", timeout=120  # LiteLLM uses OpenAI format
+        )
+
+        # Validate streaming results
+        assert chunk_count > 0, "Should receive at least one chunk"
+        assert len(content) > 10, "Should receive substantial content"
+        assert not tool_calls_detected, "Basic streaming shouldn't have tool calls"
+
+        # Test streaming with tool calls
+        stream_with_tools = litellm.completion(
+            model=model,
+            messages=STREAMING_TOOL_CALL_MESSAGES,
+            max_tokens=150,
+            tools=convert_to_litellm_tools([WEATHER_TOOL]),
+            stream=True,
+        )
+
+        content_tools, chunk_count_tools, tool_calls_detected_tools = collect_streaming_content(
+            stream_with_tools, "openai", timeout=120  # LiteLLM uses OpenAI format
+        )
+
+        # Validate tool streaming results
+        assert chunk_count_tools > 0, "Should receive at least one chunk with tools"
+        assert tool_calls_detected_tools, "Should detect tool calls in streaming response"
+
+    @pytest.mark.skip(reason="known flaky test")
+    def test_14_gemini_integration(self, test_config):
+        """Test Case 14: Google Gemini integration through LiteLLM"""
+        try:
+            # Test basic chat with Gemini
+            response = litellm.completion(
+                model="gemini-2.0-flash-001",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "What is machine learning? Answer in one sentence.",
+                    }
+                ],
+                max_tokens=100,
+            )
+
+            assert_valid_chat_response(response)
+            content = response.choices[0].message.content.lower()
+            assert any(
+                word in content for word in ["machine", "learning", "data", "algorithm"]
+            ), f"Response should mention ML concepts. Got: {content}"
+
+            # Test with tool calling if supported
+            tools = convert_to_litellm_tools([CALCULATOR_TOOL])
+            response_tools = litellm.completion(
+                model="gemini-2.0-flash-001",
+                messages=[{"role": "user", "content": "Calculate 42 * 17"}],
+                tools=tools,
+                max_tokens=100,
+            )
+
+            # Gemini should either use tools or provide calculation
+            if response_tools.choices[0].message.tool_calls:
+                assert_has_tool_calls(response_tools, expected_count=1)
+            else:
+                # Should at least provide the calculation result
+                content = response_tools.choices[0].message.content
+                assert "714" in content or "42" in content, "Should provide calculation result"
+
+        except Exception as e:
+            pytest.skip(f"Gemini integration not available: {e}")
+
+    @pytest.mark.skip(reason="known flaky test")
+    def test_15_mistral_integration(self, test_config):
+        """Test Case 15: Mistral integration through LiteLLM"""
+        try:
+            # Test basic chat with Mistral
+            response = litellm.completion(
+                model="mistral/mistral-7b-instruct",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "Explain recursion in programming briefly.",
+                    }
+                ],
+                max_tokens=150,
+            )
+
+            assert_valid_chat_response(response)
+            content = response.choices[0].message.content.lower()
+            assert any(
+                word in content for word in ["recursion", "function", "itself", "call"]
+            ), f"Response should explain recursion. Got: {content}"
+
+            # Test with different temperature
+            response_creative = litellm.completion(
+                model="mistral/mistral-7b-instruct",
+                messages=[{"role": "user", "content": "Write a haiku about code."}],
+                temperature=0.8,
+                max_tokens=100,
+            )
+
+            assert_valid_chat_response(response_creative)
+
+        except Exception as e:
+            pytest.skip(f"Mistral integration not available: {e}")
+
+    @pytest.mark.skip(reason="known flaky test")
+    def test_16_openai_embeddings_via_litellm(self, test_config):
+        """Test Case 16: OpenAI embeddings through LiteLLM"""
+        try:
+            # Test single text embedding
+            response = litellm.embedding(
+                model=get_model("litellm", "embeddings") or "text-embedding-3-small",
+                input=EMBEDDINGS_SINGLE_TEXT,
+            )
+
+            assert_valid_embedding_response(response, expected_dimensions=1536)
+
+            # Test batch embeddings
+            batch_response = litellm.embedding(
+                model=get_model("litellm", "embeddings") or "text-embedding-3-small",
+                input=EMBEDDINGS_MULTIPLE_TEXTS,
+            )
+
+            assert_valid_embeddings_batch_response(
+                batch_response, len(EMBEDDINGS_MULTIPLE_TEXTS), expected_dimensions=1536
+            )
+
+            # Test similarity analysis
+            similar_response = litellm.embedding(
+                model=get_model("litellm", "embeddings") or "text-embedding-3-small",
+                input=EMBEDDINGS_SIMILAR_TEXTS,
+            )
+
+            embeddings = [
+                item["embedding"] if isinstance(item, dict) else item.embedding
+                for item in (
+                    similar_response["data"]
+                    if isinstance(similar_response, dict)
+                    else similar_response.data
+                )
+            ]
+
+            # Calculate similarity between similar texts
+            similarity = calculate_cosine_similarity(embeddings[0], embeddings[1])
+            assert (
+                similarity > 0.7
+            ), f"Similar texts should have high similarity, got {similarity:.4f}"
+
+        except Exception as e:
+            pytest.skip(f"OpenAI embeddings through LiteLLM not available: {e}")
+
+    def test_17_openai_speech_via_litellm(self, test_config):
+        """Test Case 17: OpenAI speech synthesis through LiteLLM"""
+        try:
+            # Test basic speech synthesis
+            response = litellm.speech(
+                model=get_model("litellm", "speech") or "tts-1",
+                voice=get_provider_voice("openai", "primary"),
+                input=SPEECH_TEST_INPUT,
+            )
+
+            # LiteLLM might return different response format
+            if hasattr(response, "content"):
+                audio_content = response.content
+            elif isinstance(response, bytes):
+                audio_content = response
+            else:
+                audio_content = response
+
+            assert_valid_speech_response(audio_content)
+
+            # Test with different voice
+            response2 = litellm.speech(
+                model=get_model("litellm", "speech") or "tts-1",
+                voice=get_provider_voice("openai", "secondary"),
+                input="Short test message for voice comparison.",
+                response_format="mp3",
+            )
+
+            if hasattr(response2, "content"):
+                audio_content2 = response2.content
+            elif isinstance(response2, bytes):
+                audio_content2 = response2
+            else:
+                audio_content2 = response2
+
+            assert_valid_speech_response(audio_content2, expected_audio_size_min=500)
+
+            # Different voices should produce different audio
+            assert (
+                audio_content != audio_content2
+            ), "Different voices should produce different audio"
+
+        except Exception as e:
+            pytest.skip(f"OpenAI speech through LiteLLM not available: {e}")
+
+    def test_18_openai_transcription_via_litellm(self, test_config):
+        """Test Case 18: OpenAI transcription through LiteLLM"""
+        try:
+            # Generate test audio for transcription
+            test_audio = generate_test_audio()
+
+            # Test basic transcription
+            response = litellm.transcription(
+                model=get_model("litellm", "transcription") or "whisper-1",
+                file=("test_audio.wav", test_audio, "audio/wav"),
+            )
+
+            assert_valid_transcription_response(response)
+
+            # Test with additional parameters
+            response2 = litellm.transcription(
+                model=get_model("litellm", "transcription") or "whisper-1",
+                file=("test_audio.wav", test_audio, "audio/wav"),
+                language="en",
+                temperature=0.0,
+            )
+
+            assert_valid_transcription_response(response2)
+
+        except Exception as e:
+            pytest.skip(f"OpenAI transcription through LiteLLM not available: {e}")
+
+    def test_19_multi_provider_comparison(self, test_config):
+        """Test Case 19: Compare responses across different providers through LiteLLM"""
+        test_prompt = "What is the capital of Japan? Answer in one word."
+        models_to_test = [
+            "gpt-3.5-turbo",  # OpenAI
+            "claude-3-haiku-20240307",  # Anthropic
+            "gemini-2.0-flash-001",  # Google
+        ]
+
+        responses = {}
+
+        for model in models_to_test:
+            try:
+                response = litellm.completion(
+                    model=model,
+                    messages=[{"role": "user", "content": test_prompt}],
+                    max_tokens=50,
+                )
+
+                assert_valid_chat_response(response)
+                responses[model] = response.choices[0].message.content.lower()
+
+            except Exception as e:
+                print(f"Model {model} not available: {e}")
+                continue
+
+        # Verify that we got at least one response
+        assert len(responses) > 0, "Should get at least one successful response"
+
+        # All responses should mention Tokyo or Japan
+        for model, content in responses.items():
+            assert any(
+                word in content for word in ["tokyo", "japan"]
+            ), f"Model {model} should mention Tokyo. Got: {content}"
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_20_token_counter_simple_text(self, test_config, provider, model):
+        """Test Case 20: Count tokens from simple text using LiteLLM token_counter"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+
+        try:
+            # Count tokens using text parameter
+            token_count = litellm.token_counter(
+                model=model,
+                text=INPUT_TOKENS_SIMPLE_TEXT,
+            )
+
+            # Validate token count
+            assert isinstance(token_count, int), "Token count should be an integer"
+            assert token_count > 0, "Token count should be positive"
+            # Simple text should have a reasonable token count (between 3-20 tokens)
+            assert 3 <= token_count <= 20, (
+                f"Simple text should have 3-20 tokens, got {token_count}"
+            )
+
+        except Exception as e:
+            pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_21_token_counter_with_messages(self, test_config, provider, model):
+        """Test Case 21: Count tokens from messages with system message using LiteLLM token_counter"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+
+        try:
+            # Count tokens using messages parameter
+            token_count = litellm.token_counter(
+                model=model,
+                messages=INPUT_TOKENS_WITH_SYSTEM,
+            )
+
+            # Validate token count
+            assert isinstance(token_count, int), "Token count should be an integer"
+            assert token_count > 0, "Token count should be positive"
+            # With system message should have more tokens than simple text
+            assert token_count > 2, (
+                f"With system message should have >2 tokens, got {token_count}"
+            )
+
+        except Exception as e:
+            pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
+
+    @pytest.mark.parametrize(
+        "provider, model",
+        get_cross_provider_params_for_scenario(
+            "count_tokens", exclude_providers=LITELLM_EXCLUDED_PROVIDERS
+        ),
+    )
+    def test_22_token_counter_long_text(self, test_config, provider, model):
+        """Test Case 22: Count tokens from long text using LiteLLM token_counter"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+
+        try:
+            # Count tokens using text parameter with long text
+            token_count = litellm.token_counter(
+                model=model,
+                text=INPUT_TOKENS_LONG_TEXT,
+            )
+
+            # Validate token count
+            assert isinstance(token_count, int), "Token count should be an integer"
+            assert token_count > 100, (
+                f"Long text should have >100 tokens, got {token_count}"
+            )
+
+        except Exception as e:
+            pytest.skip(f"Token counting not available for {provider}/{model}: {e}")
+
+
+
+# Additional helper functions specific to LiteLLM
+def extract_litellm_tool_calls(response: Any) -> List[Dict[str, Any]]:
+    """Extract tool calls from LiteLLM response format (OpenAI-compatible) with proper type checking"""
+    tool_calls = []
+
+    # Type check for LiteLLM response (OpenAI-compatible format)
+    if not hasattr(response, "choices") or not response.choices:
+        return tool_calls
+
+    choice = response.choices[0]
+    if not hasattr(choice, "message") or not hasattr(choice.message, "tool_calls"):
+        return tool_calls
+
+    if not choice.message.tool_calls:
+        return tool_calls
+
+    for tool_call in choice.message.tool_calls:
+        if hasattr(tool_call, "function") and hasattr(tool_call.function, "name"):
+            try:
+                arguments = (
+                    json.loads(tool_call.function.arguments)
+                    if isinstance(tool_call.function.arguments, str)
+                    else tool_call.function.arguments
+                )
+                tool_calls.append(
+                    {
+                        "name": tool_call.function.name,
+                        "arguments": arguments,
+                    }
+                )
+            except (json.JSONDecodeError, AttributeError) as e:
+                print(f"Warning: Failed to parse LiteLLM tool call arguments: {e}")
+                continue
+
+    return tool_calls
--- a/tests/integrations/python/tests/test_openai.py
+++ b/tests/integrations/python/tests/test_openai.py
--- a/tests/integrations/python/tests/test_pydanticai.py
+++ b/tests/integrations/python/tests/test_pydanticai.py
@@ -0,0 +1,781 @@
+"""
+Pydantic AI Integration Tests - Cross-Provider Support
+
+🌉 CROSS-PROVIDER TESTING:
+This test suite uses Pydantic AI to test against multiple AI providers through Bifrost.
+Tests automatically run against all available providers with proper capability filtering.
+
+🤖 PYDANTIC AI COMPONENTS TESTED:
+- Agent: Core agent class for running LLM interactions
+- Models: OpenAI (OpenAIChatModel), Anthropic (AnthropicModel), Google (GoogleModel), Cohere (CohereModel)
+- Providers: OpenAIProvider, AnthropicProvider, GoogleProvider, CohereProvider
+- Tools: Function tools with @agent.tool decorator
+- Structured Output: Pydantic BaseModel result types
+- Streaming: Real-time response streaming
+- Async Operations: agent.run() async patterns
+
+⚠️ PROVIDER LIMITATIONS:
+- Bedrock: Not supported in PydanticAI tests - tested separately in test_bedrock.py
+
+Tests Pydantic AI standard interface compliance and Bifrost integration:
+1. Basic Agent chat - Cross-provider
+2. Agent with system prompt (instructions) - Cross-provider
+3. Multi-turn conversation with message history - Cross-provider
+4. Tool calling with @agent.tool decorator - Cross-provider
+5. End-to-end tool calling with multi-turn flow - Cross-provider
+6. Structured output with Pydantic models - Cross-provider
+7. Streaming responses - Cross-provider
+8. Async operations
+9. Error handling
+10. Tool with context - Cross-provider
+11. Multiple tools - Cross-provider
+12. Result validation
+13. Usage tracking
+14. Message history inspection
+15. Dynamic instructions
+"""
+
+import pytest
+import asyncio
+import os
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+
+from pydantic import BaseModel, Field
+from pydantic_ai import Agent, RunContext, Tool
+
+# Pydantic AI model imports
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+
+# Optional provider imports
+try:
+    from pydantic_ai.models.anthropic import AnthropicModel
+    from pydantic_ai.providers.anthropic import AnthropicProvider
+    ANTHROPIC_AVAILABLE = True
+except ImportError:
+    ANTHROPIC_AVAILABLE = False
+    AnthropicModel = None
+    AnthropicProvider = None
+
+try:
+    from pydantic_ai.models.google import GoogleModel
+    from pydantic_ai.providers.google import GoogleProvider
+    GOOGLE_AVAILABLE = True
+except ImportError:
+    GOOGLE_AVAILABLE = False
+    GoogleModel = None
+    GoogleProvider = None
+
+try:
+    from cohere import AsyncClientV2 as CohereAsyncClient
+    from pydantic_ai.models.cohere import CohereModel
+    from pydantic_ai.providers.cohere import CohereProvider
+    COHERE_AVAILABLE = True
+except ImportError:
+    COHERE_AVAILABLE = False
+    CohereAsyncClient = None
+    CohereModel = None
+    CohereProvider = None
+
+from .utils.common import (
+    Config,
+    SIMPLE_CHAT_MESSAGES,
+    MULTI_TURN_MESSAGES,
+    WEATHER_TOOL,
+    CALCULATOR_TOOL,
+    EMBEDDINGS_SINGLE_TEXT,
+    EMBEDDINGS_MULTIPLE_TEXTS,
+    mock_tool_response,
+    assert_valid_chat_response,
+    get_api_key,
+    skip_if_no_api_key,
+    WEATHER_KEYWORDS,
+    LOCATION_KEYWORDS,
+)
+from .utils.config_loader import get_model, get_integration_url, get_config
+from .utils.parametrize import (
+    get_cross_provider_params_for_scenario,
+    format_provider_model,
+)
+
+
+@pytest.fixture
+def test_config():
+    """Test configuration"""
+    return Config()
+
+
+@pytest.fixture(autouse=True)
+def setup_pydanticai():
+    """Setup Pydantic AI with Bifrost configuration and dummy credentials"""
+    # Set dummy credentials since Bifrost handles actual authentication
+    os.environ["OPENAI_API_KEY"] = "dummy-openai-key-bifrost-handles-auth"
+    os.environ["ANTHROPIC_API_KEY"] = "dummy-anthropic-key-bifrost-handles-auth"
+    os.environ["GOOGLE_API_KEY"] = "dummy-google-api-key-bifrost-handles-auth"
+    os.environ["GEMINI_API_KEY"] = "dummy-gemini-api-key-bifrost-handles-auth"
+    os.environ["CO_API_KEY"] = "dummy-cohere-key-bifrost-handles-auth"
+
+    yield
+
+    # Cleanup is handled by pytest
+
+
+def get_openai_model(model_name: str | None = None) -> OpenAIChatModel:
+    """Create an OpenAI model configured for Bifrost"""
+    base_url = get_integration_url("pydanticai")
+    if model_name is None:
+        model_name = get_model("pydanticai", "chat")
+
+    provider = OpenAIProvider(
+        base_url=f"{base_url}/v1",
+        api_key="dummy-openai-key-bifrost-handles-auth"
+    )
+    return OpenAIChatModel(model_name, provider=provider)
+
+
+def get_anthropic_model(model_name: str = "claude-3-haiku-20240307") -> Optional[Any]:
+    """Create an Anthropic model configured for Bifrost"""
+    if not ANTHROPIC_AVAILABLE:
+        return None
+
+    base_url = get_integration_url("pydanticai")
+
+    # Note: Anthropic SDK adds /v1 internally, so we don't append it here
+    # (unlike OpenAI SDK which expects /v1 in the base URL)
+    provider = AnthropicProvider(
+        base_url=base_url,
+        api_key="dummy-anthropic-key-bifrost-handles-auth"
+    )
+    return AnthropicModel(model_name, provider=provider)
+
+
+def get_google_model(model_name: str = "gemini-2.0-flash") -> Optional[Any]:
+    """Create a Google model configured for Bifrost"""
+    if not GOOGLE_AVAILABLE:
+        return None
+
+    base_url = get_integration_url("pydanticai")
+
+    # Configure GoogleProvider with Bifrost endpoint
+    provider = GoogleProvider(
+        api_key="dummy-google-api-key-bifrost-handles-auth",
+        base_url=base_url
+    )
+    return GoogleModel(model_name, provider=provider)
+
+
+def get_cohere_model(model_name: str = "command-r7b-12-2024") -> Optional[Any]:
+    """Create a Cohere model configured for Bifrost"""
+    if not COHERE_AVAILABLE:
+        return None
+
+    base_url = get_integration_url("pydanticai")
+
+    # Cohere SDK's AsyncClientV2 accepts base_url parameter
+    # We create a custom client pointing to Bifrost and pass it to CohereProvider
+    cohere_client = CohereAsyncClient(
+        api_key="dummy-cohere-key-bifrost-handles-auth",
+        base_url=base_url
+    )
+    provider = CohereProvider(
+        cohere_client=cohere_client
+    )
+    return CohereModel(model_name, provider=provider)
+
+
+def get_pydanticai_model_for_provider(provider: str, model: str) -> Any:
+    """
+    Factory function to create a Pydantic AI model for a given provider.
+    
+    This is the cross-provider equivalent of format_provider_model() used in Bedrock tests,
+    but returns actual Pydantic AI model objects instead of string identifiers.
+    
+    Args:
+        provider: Provider name (e.g., 'openai', 'anthropic', 'gemini', 'cohere')
+        model: Model name (e.g., 'gpt-4o-mini', 'claude-sonnet-4-20250514')
+    
+    Returns:
+        Configured Pydantic AI model object for the provider
+        
+    Raises:
+        ValueError: If provider is not supported or required SDK is not available
+    """
+    provider_lower = provider.lower()
+    
+    if provider_lower == "openai":
+        return get_openai_model(model)
+    
+    elif provider_lower == "anthropic":
+        if not ANTHROPIC_AVAILABLE:
+            raise ValueError(f"Anthropic SDK not available for provider '{provider}'")
+        return get_anthropic_model(model)
+    
+    elif provider_lower in ["gemini", "google"]:
+        if not GOOGLE_AVAILABLE:
+            raise ValueError(f"Google GenAI SDK not available for provider '{provider}'")
+        return get_google_model(model)
+    
+    elif provider_lower == "cohere":
+        if not COHERE_AVAILABLE:
+            raise ValueError(f"Cohere SDK not available for provider '{provider}'")
+        return get_cohere_model(model)
+    
+    elif provider_lower == "bedrock":
+        # Bedrock is tested separately in test_bedrock.py using the native Bedrock API
+        # PydanticAI doesn't have native Bedrock support, and using OpenAI SDK causes
+        # validation errors due to response format differences (e.g., empty service_tier)
+        raise ValueError(
+            f"Provider 'bedrock' is not supported in PydanticAI tests - "
+            f"use test_bedrock.py for Bedrock testing"
+        )
+    
+    else:
+        raise ValueError(f"Unsupported provider: {provider}. Supported: openai, anthropic, gemini, cohere")
+
+
+# Structured output models for testing
+class CityInfo(BaseModel):
+    """Information about a city"""
+    city: str = Field(description="Name of the city")
+    country: str = Field(description="Country where the city is located")
+
+
+class WeatherResponse(BaseModel):
+    """Weather information response"""
+    location: str = Field(description="Location for the weather")
+    temperature: str = Field(description="Current temperature")
+    conditions: str = Field(description="Weather conditions description")
+
+
+class CalculationResult(BaseModel):
+    """Result of a calculation"""
+    expression: str = Field(description="The mathematical expression")
+    result: float = Field(description="The calculated result")
+
+
+class TestPydanticAIIntegration:
+    """Comprehensive Pydantic AI integration tests through Bifrost"""
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
+    def test_01_basic_agent_chat(self, test_config, provider, model):
+        """Test Case 1: Basic Agent chat functionality - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+            agent = Agent(
+                pydantic_model,
+                instructions="Be concise, reply with one sentence.",
+            )
+
+            result = agent.run_sync("Hello! How are you today?")
+
+            assert result is not None
+            assert result.output is not None
+            assert len(str(result.output)) > 0
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("simple_chat"))
+    def test_02_agent_with_system_prompt(self, test_config, provider, model):
+        """Test Case 2: Agent with custom system prompt (instructions) - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+            agent = Agent(
+                pydantic_model,
+                instructions=(
+                    "You are a helpful geography expert. "
+                    "Always mention the continent when discussing cities."
+                ),
+            )
+
+            result = agent.run_sync("What is the capital of France?")
+
+            assert result is not None
+            assert result.output is not None
+            content = str(result.output).lower()
+            assert "paris" in content
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multi_turn_conversation"))
+    def test_03_multi_turn_conversation(self, test_config, provider, model):
+        """Test Case 3: Multi-turn conversation with message history - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+            agent = Agent(
+                pydantic_model,
+                instructions="You are a helpful assistant. Remember context from previous messages.",
+            )
+
+            # First turn
+            result1 = agent.run_sync("My name is Alice.")
+
+            # Second turn - should remember the name
+            result2 = agent.run_sync(
+                "What is my name?",
+                message_history=result1.all_messages(),
+            )
+
+            assert result2 is not None
+            assert result2.output is not None
+            content = str(result2.output).lower()
+            assert "alice" in content
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
+    def test_04_tool_calling(self, test_config, provider, model):
+        """Test Case 4: Tool calling with @agent.tool decorator - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+
+            # Define tools as functions
+            def get_weather(location: str) -> str:
+                """Get the current weather for a location."""
+                return f"The weather in {location} is 72°F and sunny."
+
+            def calculate(expression: str) -> str:
+                """Perform a mathematical calculation."""
+                try:
+                    # Safe evaluation for simple expressions
+                    result = eval(expression.replace("x", "*").replace("×", "*"))
+                    return f"The result of {expression} is {result}"
+                except Exception:
+                    return f"Could not calculate {expression}"
+
+            agent = Agent(
+                pydantic_model,
+                tools=[get_weather, calculate],
+                instructions="You are a helpful assistant that can check weather and do calculations.",
+            )
+
+            result = agent.run_sync("What's the weather like in Boston?")
+
+            assert result is not None
+            assert result.output is not None
+            content = str(result.output).lower()
+            # Should either mention weather info or Boston
+            weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
+            assert any(
+                word in content for word in weather_location_keywords
+            ), f"Response should mention weather or location. Got: {content}"
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("end2end_tool_calling"))
+    def test_05_end2end_tool_calling(self, test_config, provider, model):
+        """Test Case 5: Complete end-to-end tool calling flow with multi-turn conversation - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+
+            # Define a tool that we'll manually execute
+            def get_weather(location: str) -> str:
+                """Get the current weather for a location."""
+                return f"The weather in {location} is 72°F and sunny."
+
+            agent = Agent(
+                pydantic_model,
+                tools=[get_weather],
+                instructions="You are a helpful assistant that can check weather.",
+            )
+
+            # Step 1: Initial request - should trigger tool call
+            result1 = agent.run_sync("What's the weather in Boston in fahrenheit?")
+
+            assert result1 is not None
+            assert result1.output is not None
+            
+            # Pydantic AI automatically executes tools, so result1.output should contain
+            # the final response with weather information.
+            
+            # Verify the response contains weather information
+            content = str(result1.output).lower()
+            weather_location_keywords = WEATHER_KEYWORDS + LOCATION_KEYWORDS
+            assert any(
+                word in content for word in weather_location_keywords
+            ), f"Response should mention weather or location. Got: {content}"
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydantic_structured_output"))
+    def test_06_structured_output(self, test_config, provider, model):
+        """Test Case 5: Structured output with Pydantic models - runs on providers with reliable PydanticAI structured output support"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+            agent = Agent(
+                pydantic_model,
+                output_type=CityInfo,
+                instructions="Extract city information from the user's question.",
+            )
+
+            result = agent.run_sync("Tell me about Paris, the capital of France.")
+
+            assert result is not None
+            assert result.output is not None
+            assert isinstance(result.output, CityInfo)
+            assert result.output.city.lower() == "paris"
+            assert "france" in result.output.country.lower()
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("pydanticai_streaming"))
+    def test_07_streaming_responses(self, test_config, provider, model):
+        """Test Case 7: Streaming response functionality - runs on providers with PydanticAI streaming support"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+            agent = Agent(
+                pydantic_model,
+                instructions="You are a storyteller. Tell short, engaging stories.",
+            )
+
+            # Use async streaming with proper event loop handling
+            async def run_streaming():
+                chunks = []
+                async with agent.run_stream("Tell me a very short story about a robot.") as response:
+                    async for chunk in response.stream_text():
+                        chunks.append(chunk)
+                return "".join(chunks), len(chunks)
+
+            # Use asyncio.new_event_loop() to avoid conflicts with existing event loops
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            try:
+                full_content, chunk_count = loop.run_until_complete(run_streaming())
+            finally:
+                loop.close()
+
+            assert chunk_count > 0, "Should receive streaming chunks"
+            assert len(full_content) > 0, "Should have content from streaming"
+            assert any(
+                word in full_content.lower() for word in ["robot", "story", "once"]
+            ), f"Response should be a story about robots. Got: {full_content[:200]}"
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    def test_08_async_operations(self, test_config):
+        """Test Case 8: Async operation support"""
+
+        async def async_test():
+            try:
+                model = get_openai_model()
+                agent = Agent(
+                    model,
+                    instructions="Be concise.",
+                )
+
+                result = await agent.run("Hello from async!")
+
+                assert result is not None
+                assert result.output is not None
+                assert len(str(result.output)) > 0
+
+                return True
+
+            except Exception as e:
+                pytest.skip(f"Async operations through Pydantic AI not available: {e}")
+                return False
+
+        result = asyncio.run(async_test())
+        if result is not False:
+            assert result is True
+
+    def test_09_error_handling(self, test_config):
+        """Test Case 9: Error handling for invalid requests"""
+        try:
+            # Test with invalid model name
+            base_url = get_integration_url("pydanticai")
+            provider = OpenAIProvider(
+                base_url=f"{base_url}/v1",
+                api_key="dummy-key"
+            )
+            model = OpenAIChatModel("invalid-model-name-should-fail", provider=provider)
+            agent = Agent(model)
+
+            with pytest.raises(Exception) as exc_info:
+                agent.run_sync("This should fail gracefully.")
+
+            # Should get a meaningful error
+            error_message = str(exc_info.value).lower()
+            assert any(
+                word in error_message
+                for word in ["model", "error", "invalid", "not found", "does not exist"]
+            )
+
+        except Exception as e:
+            pytest.skip(f"Error handling test through Pydantic AI not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("tool_calls"))
+    def test_10_tool_with_context(self, test_config, provider, model):
+        """Test Case 10: Tool with RunContext for dependency injection - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+
+            @dataclass
+            class UserDeps:
+                user_name: str
+                user_id: int
+
+            def get_user_info(ctx: RunContext[UserDeps]) -> str:
+                """Get information about the current user."""
+                return f"User: {ctx.deps.user_name} (ID: {ctx.deps.user_id})"
+
+            agent = Agent(
+                pydantic_model,
+                deps_type=UserDeps,
+                tools=[Tool(get_user_info, takes_ctx=True)],
+                instructions="You can look up user information when asked.",
+            )
+
+            deps = UserDeps(user_name="Alice", user_id=123)
+            result = agent.run_sync("What is my user information?", deps=deps)
+
+            assert result is not None
+            assert result.output is not None
+            content = str(result.output).lower()
+            # Should mention Alice or user info
+            assert "alice" in content or "user" in content
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("multiple_tool_calls"))
+    def test_11_multiple_tools(self, test_config, provider, model):
+        """Test Case 11: Multiple tools in single agent - runs across all available providers"""
+        if provider == "_no_providers_" or model == "_no_model_":
+            pytest.skip("No providers configured for this scenario")
+        
+        try:
+            pydantic_model = get_pydanticai_model_for_provider(provider, model)
+
+            def get_weather(location: str) -> str:
+                """Get weather for a location."""
+                return f"Weather in {location}: 72°F, sunny"
+
+            def get_time(timezone: str) -> str:
+                """Get current time in a timezone."""
+                return f"Current time in {timezone}: 2:30 PM"
+
+            def translate(text: str, target_language: str) -> str:
+                """Translate text to another language."""
+                return f"'{text}' in {target_language}: [translated]"
+
+            agent = Agent(
+                pydantic_model,
+                tools=[get_weather, get_time, translate],
+                instructions="You can check weather, time, and translate text.",
+            )
+
+            result = agent.run_sync("What's the weather in New York?")
+
+            assert result is not None
+            assert result.output is not None
+
+        except ValueError as e:
+            pytest.skip(f"Provider {provider} not available: {e}")
+
+    def test_12_agent_with_result_validators(self, test_config):
+        """Test Case 12: Agent with result type validation"""
+        try:
+            model = get_openai_model()
+
+            class NumberResponse(BaseModel):
+                """A response containing a number"""
+                value: int = Field(ge=0, le=100, description="A number between 0 and 100")
+                explanation: str = Field(description="Explanation of the number")
+
+            agent = Agent(
+                model,
+                output_type=NumberResponse,
+                instructions="When asked for a number, provide a value between 0 and 100.",
+            )
+
+            result = agent.run_sync("Give me a random number for a dice roll (1-6).")
+
+            assert result is not None
+            assert result.output is not None
+            assert isinstance(result.output, NumberResponse)
+            assert 0 <= result.output.value <= 100
+
+        except Exception as e:
+            pytest.skip(f"Result validation through Pydantic AI not available: {e}")
+
+    def test_13_usage_tracking(self, test_config):
+        """Test Case 13: Usage tracking and token counting"""
+        try:
+            model = get_openai_model()
+            agent = Agent(
+                model,
+                instructions="Be concise.",
+            )
+
+            result = agent.run_sync("Say hello.")
+
+            assert result is not None
+
+            # Check usage information
+            usage = result.usage()
+            assert usage is not None
+            # Usage should have token counts
+            if hasattr(usage, 'total_tokens'):
+                assert usage.total_tokens > 0
+            elif hasattr(usage, 'input_tokens'):
+                assert usage.input_tokens > 0
+
+        except Exception as e:
+            pytest.skip(f"Usage tracking through Pydantic AI not available: {e}")
+
+    def test_14_message_history_inspection(self, test_config):
+        """Test Case 14: Inspect message history after run"""
+        try:
+            model = get_openai_model()
+            agent = Agent(
+                model,
+                instructions="Be helpful.",
+            )
+
+            result = agent.run_sync("What is 2 + 2?")
+
+            # Inspect all messages
+            messages = result.all_messages()
+            assert messages is not None
+            assert len(messages) >= 2  # At least request and response
+
+            # Should have user message and assistant response
+            message_kinds = [msg.kind for msg in messages]
+            assert "request" in message_kinds
+            assert "response" in message_kinds
+
+        except Exception as e:
+            pytest.skip(f"Message history inspection through Pydantic AI not available: {e}")
+
+    def test_15_dynamic_instructions(self, test_config):
+        """Test Case 15: Dynamic instructions based on context"""
+        try:
+            model = get_openai_model()
+
+            @dataclass
+            class LanguageDeps:
+                language: str
+
+            agent = Agent(
+                model,
+                deps_type=LanguageDeps,
+            )
+
+            @agent.instructions
+            def dynamic_instructions(ctx: RunContext[LanguageDeps]) -> str:
+                return f"Always respond in {ctx.deps.language}. Be concise."
+
+            deps = LanguageDeps(language="English")
+            result = agent.run_sync("Say hello.", deps=deps)
+
+            assert result is not None
+            assert result.output is not None
+            # Response should be in English
+            content = str(result.output).lower()
+            assert any(word in content for word in ["hello", "hi", "greetings"])
+
+        except Exception as e:
+            pytest.skip(f"Dynamic instructions through Pydantic AI not available: {e}")
+
+
+# Additional test class for edge cases
+class TestPydanticAIEdgeCases:
+    """Edge case tests for Pydantic AI integration"""
+
+    def test_empty_response_handling(self, test_config):
+        """Test handling of potentially empty responses"""
+        try:
+            model = get_openai_model()
+            agent = Agent(
+                model,
+                instructions="If asked to say nothing, respond with a single space.",
+            )
+
+            result = agent.run_sync("Say as little as possible.")
+
+            # Should still get a valid result object
+            assert result is not None
+
+        except Exception as e:
+            pytest.skip(f"Empty response handling test not available: {e}")
+
+    def test_special_characters_in_prompt(self, test_config):
+        """Test handling of special characters in prompts"""
+        try:
+            model = get_openai_model()
+            agent = Agent(
+                model,
+                instructions="Echo back special characters correctly.",
+            )
+
+            special_prompt = "Handle these: 你好 🎉 <tag> & \"quotes\" 'apostrophe'"
+            result = agent.run_sync(special_prompt)
+
+            assert result is not None
+            assert result.output is not None
+
+        except Exception as e:
+            pytest.skip(f"Special characters test not available: {e}")
+
+    def test_long_conversation_context(self, test_config):
+        """Test handling of longer conversation context"""
+        try:
+            model = get_openai_model()
+            agent = Agent(
+                model,
+                instructions="You are a helpful assistant.",
+            )
+
+            # Build up conversation history
+            history = None
+            for i in range(3):
+                result = agent.run_sync(
+                    f"Remember number {i + 1}.",
+                    message_history=history,
+                )
+                history = result.all_messages()
+
+            # Final query should work with accumulated history
+            final_result = agent.run_sync(
+                "What numbers did I ask you to remember?",
+                message_history=history,
+            )
+
+            assert final_result is not None
+            assert final_result.output is not None
+
+        except Exception as e:
+            pytest.skip(f"Long conversation context test not available: {e}")
+
--- a/tests/integrations/python/tests/utils/init.py
+++ b/tests/integrations/python/tests/utils/init.py
@@ -0,0 +1 @@
+# Utils package for shared test utilities
--- a/tests/integrations/python/tests/utils/common.py
+++ b/tests/integrations/python/tests/utils/common.py
--- a/tests/integrations/python/tests/utils/config_loader.py
+++ b/tests/integrations/python/tests/utils/config_loader.py
@@ -0,0 +1,522 @@
+"""
+Configuration loader for Bifrost integration tests.
+
+This module loads configuration from config.yml and provides utilities
+for constructing integration URLs through the Bifrost gateway.
+"""
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+# Integration to provider mapping
+# Maps integration names to their underlying provider configurations
+INTEGRATION_TO_PROVIDER_MAP = {
+    "openai": "openai",
+    "anthropic": "anthropic",
+    "google": "gemini",  # Google integration uses Gemini provider
+    "litellm": "openai",  # LiteLLM defaults to OpenAI
+    "langchain": "openai",  # LangChain defaults to OpenAI
+    "pydanticai": "openai",  # Pydantic AI defaults to OpenAI
+    "bedrock": "bedrock",  # Bedrock defaults to Amazon provider
+    "azure": "azure",
+}
+
+@dataclass
+class BifrostConfig:
+    """Bifrost gateway configuration"""
+
+    base_url: str
+    endpoints: Dict[str, str]
+
+
+@dataclass
+class IntegrationModels:
+    """Model configuration for a integration"""
+
+    chat: str
+    vision: str
+    tools: str
+    alternatives: list
+
+
+@dataclass
+class TestConfig:
+    """Complete test configuration"""
+
+    bifrost: BifrostConfig
+    api: Dict[str, Any]
+    models: Dict[str, IntegrationModels]
+    model_capabilities: Dict[str, Dict[str, Any]]
+    test_settings: Dict[str, Any]
+    integration_settings: Dict[str, Any]
+    environments: Dict[str, Any]
+    logging: Dict[str, Any]
+
+
+class ConfigLoader:
+    """Configuration loader for Bifrost integration tests"""
+
+    def __init__(self, config_path: Optional[str] = None):
+        """Initialize configuration loader
+
+        Args:
+            config_path: Path to config.yml file. If None, looks for config.yml in project root.
+        """
+        if config_path is None:
+            # Look for config.yml in project root
+            project_root = Path(__file__).parent.parent.parent
+            config_path = project_root / "config.yml"
+
+        self.config_path = Path(config_path)
+        self._config = None
+        self._load_config()
+
+    def _load_config(self):
+        """Load configuration from YAML file"""
+        if not self.config_path.exists():
+            raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
+
+        with open(self.config_path, "r") as f:
+            raw_config = yaml.safe_load(f)
+
+        # Expand environment variables
+        self._config = self._expand_env_vars(raw_config)
+
+    def _expand_env_vars(self, obj):
+        """Recursively expand environment variables in configuration"""
+        if isinstance(obj, dict):
+            return {k: self._expand_env_vars(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [self._expand_env_vars(item) for item in obj]
+        elif isinstance(obj, str):
+            # Handle ${VAR:-default} syntax
+            import re
+
+            pattern = r"\$\{([^}]+)\}"
+
+            def replace_var(match):
+                var_expr = match.group(1)
+                if ":-" in var_expr:
+                    var_name, default_value = var_expr.split(":-", 1)
+                    return os.getenv(var_name, default_value)
+                else:
+                    return os.getenv(var_expr, "")
+
+            return re.sub(pattern, replace_var, obj)
+        else:
+            return obj
+
+    def get_integration_url(self, integration: str) -> str:
+        """Get the complete URL for a integration
+
+        Args:
+            integration: Integration name (openai, anthropic, google, litellm)
+
+        Returns:
+            Complete URL for the integration
+
+        Examples:
+            get_integration_url("openai") -> "http://localhost:8080/openai"
+        """
+        bifrost_config = self._config["bifrost"]
+        base_url = bifrost_config["base_url"]
+        endpoint = bifrost_config["endpoints"].get(integration, "")
+
+        if not endpoint:
+            raise ValueError(f"No endpoint configured for integration: {integration}")
+
+        return f"{base_url.rstrip('/')}/{endpoint}"
+
+    def get_bifrost_config(self) -> BifrostConfig:
+        """Get Bifrost configuration"""
+        bifrost_data = self._config["bifrost"]
+        return BifrostConfig(
+            base_url=bifrost_data["base_url"], endpoints=bifrost_data["endpoints"]
+        )
+
+    def get_model(self, integration: str, model_type: str = "chat") -> str:
+        """Get model name for an integration and type
+        
+        Maps integration names to provider configurations.
+        
+        Args:
+            integration: Integration name (openai, anthropic, google, litellm, langchain)
+            model_type: Model type (chat, vision, tools, etc.)
+        
+        Returns:
+            Model name for the integration and type
+        """
+        # Map integration to provider
+        provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
+        if not provider:
+            raise ValueError(
+                f"Unknown integration: {integration}. "
+                f"Valid integrations: {list(INTEGRATION_TO_PROVIDER_MAP.keys())}"
+            )
+        
+        # Get model from provider configuration
+        return self.get_provider_model(provider, model_type)
+
+    def get_model_alternatives(self, integration: str) -> list:
+        """Get alternative models for an integration"""
+        # Map integration to provider
+        provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
+        if not provider:
+            return []
+        
+        # Get alternatives from provider configuration
+        if "providers" not in self._config:
+            return []
+        
+        if provider not in self._config["providers"]:
+            return []
+        
+        return self._config["providers"][provider].get("alternatives", [])
+
+    def get_model_capabilities(self, model: str) -> Dict[str, Any]:
+        """Get capabilities for a specific model"""
+        return self._config["model_capabilities"].get(
+            model,
+            {
+                "chat": True,
+                "tools": False,
+                "vision": False,
+                "max_tokens": 4096,
+                "context_window": 4096,
+            },
+        )
+
+    def supports_capability(self, model: str, capability: str) -> bool:
+        """Check if a model supports a specific capability"""
+        caps = self.get_model_capabilities(model)
+        return caps.get(capability, False)
+
+    def get_api_config(self) -> Dict[str, Any]:
+        """Get API configuration (timeout, retries, etc.)"""
+        return self._config["api"]
+
+    def get_test_settings(self) -> Dict[str, Any]:
+        """Get test configuration settings"""
+        return self._config["test_settings"]
+
+    def get_integration_settings(self, integration: str) -> Dict[str, Any]:
+        """Get integration-specific settings"""
+        return self._config["integration_settings"].get(integration, {})
+
+    def get_environment_config(self, environment: str | None = None) -> Dict[str, Any]:
+        """Get environment-specific configuration
+
+        Args:
+            environment: Environment name (development, production, etc.)
+                        If None, uses TEST_ENV environment variable or 'development'
+        """
+        if environment is None:
+            environment = os.getenv("TEST_ENV", "development")
+
+        return self._config["environments"].get(environment, {})
+
+    def get_logging_config(self) -> Dict[str, Any]:
+        """Get logging configuration"""
+        return self._config["logging"]
+
+    def list_integrations(self) -> list:
+        """List all configured integrations"""
+        return list(INTEGRATION_TO_PROVIDER_MAP.keys())
+
+    def list_models(self, integration: str | None = None) -> Dict[str, Any]:
+        """List all models for an integration or all integrations"""
+        if integration:
+            # Map integration to provider
+            provider = INTEGRATION_TO_PROVIDER_MAP.get(integration)
+            if not provider:
+                raise ValueError(f"Unknown integration: {integration}")
+            
+            if "providers" not in self._config or provider not in self._config["providers"]:
+                raise ValueError(f"No provider configuration for: {provider}")
+            
+            return {integration: self._config["providers"][provider]}
+        
+        # Return all providers mapped to their integration names
+        result = {}
+        for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
+            if "providers" in self._config and provider in self._config["providers"]:
+                result[integration] = self._config["providers"][provider]
+        
+        return result
+
+    def validate_config(self) -> bool:
+        """Validate configuration completeness"""
+        required_sections = ["bifrost", "providers", "api", "test_settings"]
+
+        for section in required_sections:
+            if section not in self._config:
+                raise ValueError(f"Missing required configuration section: {section}")
+
+        # Validate Bifrost configuration
+        bifrost = self._config["bifrost"]
+        if "base_url" not in bifrost or "endpoints" not in bifrost:
+            raise ValueError("Bifrost configuration missing base_url or endpoints")
+
+        # Validate that all integrations map to valid providers
+        for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
+            if provider not in self._config["providers"]:
+                raise ValueError(
+                    f"Integration '{integration}' maps to provider '{provider}' "
+                    f"which is not configured in providers section"
+                )
+
+        return True
+
+    def print_config_summary(self):
+        """Print a summary of the configuration"""
+        print("🔧 BIFROST INTEGRATION TEST CONFIGURATION")
+        print("=" * 80)
+
+        # Bifrost configuration
+        bifrost = self.get_bifrost_config()
+        print("\n🌉 BIFROST GATEWAY:")
+        print(f"  Base URL: {bifrost.base_url}")
+        print("  Endpoints:")
+        for integration, endpoint in bifrost.endpoints.items():
+            full_url = f"{bifrost.base_url.rstrip('/')}/{endpoint}"
+            print(f"    {integration}: {full_url}")
+
+        # Model configurations
+        print("\n🤖 MODEL CONFIGURATIONS (via providers):")
+        for integration, provider in INTEGRATION_TO_PROVIDER_MAP.items():
+            if "providers" in self._config and provider in self._config["providers"]:
+                models = self._config["providers"][provider]
+                print(f"  {integration.upper()} → {provider}:")
+                print(f"    Chat: {models.get('chat', 'N/A')}")
+                print(f"    Vision: {models.get('vision', 'N/A')}")
+                print(f"    Tools: {models.get('tools', 'N/A')}")
+                alternatives = models.get('alternatives', [])
+                print(f"    Alternatives: {len(alternatives)} models")
+
+        # API settings
+        api_config = self.get_api_config()
+        print("\n⚙️  API SETTINGS:")
+        print(f"  Timeout: {api_config['timeout']}s")
+        print(f"  Max Retries: {api_config['max_retries']}")
+        print(f"  Retry Delay: {api_config['retry_delay']}s")
+
+        print(f"\n✅ Configuration loaded successfully from: {self.config_path}")
+
+    def get_provider_model(self, provider: str, capability: str = "chat") -> str:
+        """Get model name for a provider and capability
+        
+        Args:
+            provider: Provider name (e.g., 'openai', 'anthropic', 'gemini')
+            capability: Capability type (default: 'chat')
+        
+        Returns:
+            Model name suitable for the provider and capability
+        """
+        if "providers" not in self._config:
+            # Fallback to old behavior if providers section doesn't exist
+            return ""
+        
+        providers = self._config["providers"]
+        if provider not in providers:
+            return ""
+        
+        provider_models = providers[provider]
+        return provider_models.get(capability, "")
+
+    def get_provider_api_key_env(self, provider: str) -> str:
+        """Get the environment variable name for a provider's API key
+        
+        Args:
+            provider: Provider name
+            
+        Returns:
+            Environment variable name
+        """
+        if "provider_api_keys" not in self._config:
+            return ""
+        
+        return self._config["provider_api_keys"].get(provider, "")
+
+    def is_provider_available(self, provider: str) -> bool:
+        """Check if a provider is available (has API key in environment)
+        
+        Args:
+            provider: Provider name
+            
+        Returns:
+            True if provider's API key is set in environment
+        """
+        env_var = self.get_provider_api_key_env(provider)
+        if not env_var:
+            return False
+        
+        api_key = os.getenv(env_var)
+        return api_key is not None and api_key.strip() != ""
+
+    def get_available_providers(self) -> List[str]:
+        """Get list of providers that are available (have API keys configured)
+        
+        Returns:
+            List of available provider names
+        """
+        if "providers" not in self._config:
+            return []
+        
+        available = []
+        for provider in self._config["providers"].keys():
+            if self.is_provider_available(provider):
+                available.append(provider)
+        
+        return available
+
+    def provider_supports_scenario(self, provider: str, scenario: str) -> bool:
+        """Check if a provider supports a specific test scenario
+        
+        Args:
+            provider: Provider name
+            scenario: Scenario name
+            
+        Returns:
+            True if provider supports the scenario
+        """
+        if "provider_scenarios" not in self._config:
+            return False
+        
+        if provider not in self._config["provider_scenarios"]:
+            return False
+        
+        scenarios = self._config["provider_scenarios"][provider]
+        return scenarios.get(scenario, False)
+
+    def get_providers_for_scenario(self, scenario: str) -> List[str]:
+        """Get list of available providers that support a specific scenario
+        
+        Args:
+            scenario: Scenario name
+            
+        Returns:
+            List of provider names that support the scenario
+        """
+        available_providers = self.get_available_providers()
+        providers = []
+        
+        for provider in available_providers:
+            if self.provider_supports_scenario(provider, scenario):
+                providers.append(provider)
+        
+        return providers
+
+    def get_scenario_capability(self, scenario: str) -> str:
+        """Get the capability type for a scenario
+        
+        Args:
+            scenario: Scenario name
+            
+        Returns:
+            Capability type (e.g., 'chat', 'vision', 'tools')
+        """
+        if "scenario_capabilities" not in self._config:
+            return "chat"  # Default
+        
+        return self._config["scenario_capabilities"].get(scenario, "chat")
+
+    def get_virtual_key(self) -> str:
+        """Get the virtual key value for testing
+        
+        Returns:
+            Virtual key string or empty string if not configured
+        """
+        if "virtual_key" not in self._config:
+            return ""
+        
+        vk_config = self._config["virtual_key"]
+        if not vk_config.get("enabled", False):
+            return ""
+        
+        return vk_config.get("value", "")
+
+    def is_virtual_key_configured(self) -> bool:
+        """Check if virtual key testing is enabled and configured
+        
+        Returns:
+            True if virtual key is available for testing
+        """
+        vk = self.get_virtual_key()
+        return vk is not None and vk.strip() != ""
+
+
+# Global configuration instance
+_config_loader = None
+
+
+def get_config() -> ConfigLoader:
+    """Get global configuration instance"""
+    global _config_loader
+    if _config_loader is None:
+        _config_loader = ConfigLoader()
+    return _config_loader
+
+
+def get_integration_url(integration: str) -> str:
+    return get_config().get_integration_url(integration)
+
+
+def get_model(integration: str, model_type: str = "chat") -> str:
+    """Convenience function to get model name"""
+    return get_config().get_model(integration, model_type)
+
+
+def get_model_capabilities(model: str) -> Dict[str, Any]:
+    """Convenience function to get model capabilities"""
+    return get_config().get_model_capabilities(model)
+
+
+def supports_capability(model: str, capability: str) -> bool:
+    """Convenience function to check model capability"""
+    return get_config().supports_capability(model, capability)
+
+
+def get_provider_model(provider: str, capability: str = "chat") -> str:
+    """Convenience function to get provider model"""
+    return get_config().get_provider_model(provider, capability)
+
+
+def is_provider_available(provider: str) -> bool:
+    """Convenience function to check provider availability"""
+    return get_config().is_provider_available(provider)
+
+
+def get_available_providers() -> List[str]:
+    """Convenience function to get available providers"""
+    return get_config().get_available_providers()
+
+
+def provider_supports_scenario(provider: str, scenario: str) -> bool:
+    """Convenience function to check scenario support"""
+    return get_config().provider_supports_scenario(provider, scenario)
+
+
+def get_providers_for_scenario(scenario: str) -> List[str]:
+    """Convenience function to get providers for scenario"""
+    return get_config().get_providers_for_scenario(scenario)
+
+
+def get_virtual_key() -> str:
+    """Convenience function to get virtual key"""
+    return get_config().get_virtual_key()
+
+
+def is_virtual_key_configured() -> bool:
+    """Convenience function to check if virtual key is configured"""
+    return get_config().is_virtual_key_configured()
+
+
+if __name__ == "__main__":
+    # Print configuration summary when run directly
+    config = get_config()
+    config.validate_config()
+    config.print_config_summary()
--- a/tests/integrations/python/tests/utils/models.py
+++ b/tests/integrations/python/tests/utils/models.py
@@ -0,0 +1,66 @@
+"""
+Model configurations for each integration.
+
+This file now acts as a compatibility layer and convenience wrapper
+around the new configuration system in config.yml and config_loader.py.
+
+All model data is now centralized in config.yml for easier maintenance.
+"""
+
+from typing import Dict, List
+from dataclasses import dataclass
+from .config_loader import get_config
+
+
+@dataclass
+class IntegrationModels:
+    """Model configuration for a integration"""
+
+    chat: str  # Primary chat model
+    vision: str  # Vision/multimodal model
+    tools: str  # Function calling model
+    alternatives: List[str]  # Alternative models for testing
+
+
+def get_integration_models() -> Dict[str, IntegrationModels]:
+    """Get all integration model configurations from config.yml"""
+    config = get_config()
+    integration_models = {}
+
+    for integration in config.list_integrations():
+        models_config = config.list_models(integration)
+        integration_models[integration] = IntegrationModels(
+            chat=models_config["chat"],
+            vision=models_config["vision"],
+            tools=models_config["tools"],
+            alternatives=models_config["alternatives"],
+        )
+
+    return integration_models
+
+
+# Backward compatibility - load from config
+INTEGRATION_MODELS = get_integration_models()
+
+
+def get_alternatives(integration: str) -> List[str]:
+    """Get alternative models for a integration"""
+    config = get_config()
+    return config.get_model_alternatives(integration)
+
+
+def list_all_models() -> Dict[str, Dict[str, str]]:
+    """List all models by integration and type"""
+    config = get_config()
+    return config.list_models()
+
+
+# Print model summary for documentation
+def print_model_summary():
+    """Print a summary of all models and their capabilities"""
+    config = get_config()
+    config.print_config_summary()
+
+
+if __name__ == "__main__":
+    print_model_summary()
--- a/tests/integrations/python/tests/utils/parametrize.py
+++ b/tests/integrations/python/tests/utils/parametrize.py
@@ -0,0 +1,141 @@
+"""
+Parametrization utilities for cross-provider testing.
+
+This module provides pytest parametrization for testing across multiple AI providers
+with automatic scenario-based filtering.
+"""
+
+from typing import List, Tuple, Union
+from .config_loader import get_config
+
+
+def get_cross_provider_params_for_scenario(
+    scenario: str,
+    include_providers: List[str] | None = None,
+    exclude_providers: List[str] | None = None,
+) -> List[Tuple[str, str]]:
+    config = get_config()
+    
+    # Get providers that support this scenario
+    providers = config.get_providers_for_scenario(scenario)
+    
+    # Apply include filter
+    if include_providers:
+        providers = [p for p in providers if p in include_providers]
+    
+    # Apply exclude filter
+    if exclude_providers:
+        providers = [p for p in providers if p not in exclude_providers]
+    
+    # Generate (provider, model) tuples
+    # Automatically maps: scenario → capability → model
+    params = []
+    for provider in sorted(providers):  # Sort for consistent test ordering
+        # Map scenario to capability, then get model
+        capability = config.get_scenario_capability(scenario)
+        model = config.get_provider_model(provider, capability)
+        
+        # Only add if provider has a model for this scenario's capability
+        if model:
+            params.append((provider, model))
+    
+    # If no providers available, return a dummy tuple to avoid pytest errors
+    # The test will be skipped with appropriate message
+    if not params:
+        params = [("_no_providers_", "_no_model_")]
+    
+    return params
+
+
+def get_cross_provider_params_with_vk_for_scenario(
+    scenario: str,
+    include_providers: List[str] | None = None,
+    exclude_providers: List[str] | None = None,
+) -> List[Tuple[str, str, bool]]:
+    """
+    Get cross-provider parameters with virtual key flag for pytest parametrization.
+    
+    When virtual key is configured, each provider/model combo is tested twice:
+    once without VK (vk_enabled=False) and once with VK (vk_enabled=True).
+    
+    Args:
+        scenario: Test scenario name
+        include_providers: Optional list of providers to include
+        exclude_providers: Optional list of providers to exclude
+    
+    Returns:
+        List of (provider, model, vk_enabled) tuples
+    
+    Example:
+        When VK is configured:
+        [
+            ("openai", "gpt-4o", False),
+            ("openai", "gpt-4o", True),
+            ("anthropic", "claude-3", False),
+            ("anthropic", "claude-3", True),
+        ]
+    """
+    config = get_config()
+    
+    # Get base params without VK
+    base_params = get_cross_provider_params_for_scenario(
+        scenario, include_providers, exclude_providers
+    )
+    
+    # Handle the dummy tuple case
+    if base_params == [("_no_providers_", "_no_model_")]:
+        return [("_no_providers_", "_no_model_", False)]
+    
+    # Build params list with VK flag
+    params = []
+    vk_configured = config.is_virtual_key_configured()
+    
+    for provider, model in base_params:
+        # Always add the non-VK variant
+        params.append((provider, model, False))
+        
+        # Add VK variant only if VK is configured
+        if vk_configured:
+            params.append((provider, model, True))
+    
+    return params
+
+
+def format_vk_test_id(provider: str, model: str, vk_enabled: bool) -> str:
+    """
+    Format test ID for virtual key parameterized tests.
+    
+    Args:
+        provider: Provider name
+        model: Model name
+        vk_enabled: Whether VK is enabled
+    
+    Returns:
+        Formatted test ID string
+    
+    Example:
+        >>> format_vk_test_id("openai", "gpt-4o", True)
+        "openai-gpt-4o-with_vk"
+        >>> format_vk_test_id("openai", "gpt-4o", False)
+        "openai-gpt-4o-no_vk"
+    """
+    vk_suffix = "with_vk" if vk_enabled else "no_vk"
+    return f"{provider}-{model}-{vk_suffix}"
+
+
+def format_provider_model(provider: str, model: str) -> str:
+    """
+    Format provider and model into the standard "provider/model" format.
+    
+    Args:
+        provider: Provider name
+        model: Model name
+    
+    Returns:
+        Formatted string "provider/model"
+    
+    Example:
+        >>> format_provider_model("openai", "gpt-4o")
+        "openai/gpt-4o"
+    """
+    return f"{provider}/{model}"
--- a/tests/integrations/python/uv.lock
+++ b/tests/integrations/python/uv.lock