first commit

2026-04-26 21:52:23 +03:00
commit 880f412e2c
2662 changed files with 866266 additions and 0 deletions
--- a/tests/async/README.md
+++ b/tests/async/README.md
@@ -0,0 +1,63 @@
+# Async Inference E2E Tests
+
+End-to-end tests for Bifrost's async inference feature (`/v1/async/*` endpoints and integration route headers).
+
+## Running
+
+```bash
+go test ./... -timeout 300s
+```
+
+With virtual keys (enables VK-scoped auth tests):
+
+```bash
+BIFROST_VK=sk-bf-... BIFROST_ALT_VK=sk-bf-... go test ./... -timeout 300s
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|---|---|---|
+| `BIFROST_BASE_URL` | `http://localhost:8080` | Bifrost gateway URL |
+| `BIFROST_VK` | — | Primary virtual key; enables VK-mode tests |
+| `BIFROST_ALT_VK` | — | Second virtual key; enables cross-VK auth tests |
+| `BIFROST_POLL_TIMEOUT` | `30s` | Max time to wait for a job to reach terminal state |
+| `BIFROST_POLL_INTERVAL` | `500ms` | Polling cadence |
+| `BIFROST_INTEGRATION_PATH` | `/openai/v1/responses` | Override integration route path |
+| `BIFROST_INTEGRATION_MODEL` | `openai/gpt-4o-mini` | Override model for integration route tests |
+| `ASYNC_*_MODEL` | see below | Override model per endpoint (e.g. `ASYNC_CHAT_COMPLETION_MODEL`) |
+
+### Model overrides
+
+| Variable | Default |
+|---|---|
+| `ASYNC_TEXT_COMPLETION_MODEL` | `openai/gpt-3.5-turbo-instruct` |
+| `ASYNC_CHAT_COMPLETION_MODEL` | `openai/gpt-4o-mini` |
+| `ASYNC_RESPONSES_MODEL` | `openai/gpt-4o-mini` |
+| `ASYNC_EMBEDDINGS_MODEL` | `openai/text-embedding-3-small` |
+| `ASYNC_SPEECH_MODEL` | `openai/tts-1` |
+| `ASYNC_TRANSCRIPTION_MODEL` | `openai/whisper-1` |
+| `ASYNC_IMAGE_GEN_MODEL` | `openai/dall-e-3` |
+| `ASYNC_IMAGE_EDIT_MODEL` | `openai/dall-e-2` |
+| `ASYNC_IMAGE_VARIATION_MODEL` | `openai/dall-e-2` |
+| `ASYNC_RERANK_MODEL` | `cohere/rerank-english-v3.0` |
+| `ASYNC_OCR_MODEL` | `mistral/mistral-ocr-latest` |
+| `ASYNC_OCR_IMAGE_URL` | carpenter-ant CDN URL |
+
+## Test files
+
+| File | What it covers |
+|---|---|
+| `submit_test.go` | All 11 endpoints return 202, well-formed job envelope, immediate poll status |
+| `lifecycle_test.go` | Jobs reach terminal state, 404 for non-existent/wrong-type, result shape |
+| `auth_test.go` | VK scoping, cross-VK isolation, all three auth header formats |
+| `ttl_test.go` | Default/custom/invalid TTL, TTL expiry → 404 |
+| `validation_test.go` | Stream rejection, malformed JSON, missing required fields, wrong HTTP method |
+| `integration_route_test.go` | `x-bf-async` / `x-bf-async-id` headers on `/openai/v1/responses` |
+
+## Notes
+
+- Tests skip gracefully when the gateway is unreachable (`/health` check at startup).
+- Most tests run in two modes: **global** (no VK) and **with_vk** (when `BIFROST_VK` is set).
+- Integration route tests use the Responses API path — `AsyncChatResponseConverter` is not implemented on any route; only `AsyncResponsesResponseConverter` is wired up.
+- `BIFROST_ALT_VK` is only required for cross-VK isolation tests (`TestAuth_VKScoped_DifferentKey_Returns404`, `TestIntegration_VKScope_DifferentKey_Returns4xx`).
--- a/tests/async/auth_test.go
+++ b/tests/async/auth_test.go
@@ -0,0 +1,176 @@
+package async
+
+import (
+	"net/http"
+	"strings"
+	"testing"
+)
+
+// Auth tests cover every combination of VK presence at submit and poll time.
+// All tests use chat_completions as a representative endpoint.
+
+// assertPollSuccess fails the test unless the poll returned a success code (200 or 202).
+func assertPollSuccess(t *testing.T, code int, body []byte) {
+	t.Helper()
+	if code != http.StatusOK && code != http.StatusAccepted {
+		t.Fatalf("expected 200/202, got %d: %s", code, body)
+	}
+}
+
+// TestAuth_Submit_InvalidVK_Returns400 verifies that submitting with a VK value
+// unknown to the governance store fails at submit time with 400.
+// Requires BIFROST_VK to be set, which proves VK governance is active on the server.
+func TestAuth_Submit_InvalidVK_Returns400(t *testing.T) {
+	if cfg.VK == "" {
+		t.Skip("BIFROST_VK not set — governance may not be active")
+	}
+	ec := chatCompletionCase()
+	code, _, body := submitCase(t, ec, vkHeaders("sk-bf-nonexistent-key-for-auth-test"))
+	if code != http.StatusBadRequest {
+		t.Errorf("expected 400 for unknown VK on submit, got %d: %s", code, body)
+	}
+}
+
+// TestAuth_VKScoped_SameKey_Succeeds submits with a VK and polls with the same VK.
+func TestAuth_VKScoped_SameKey_Succeeds(t *testing.T) {
+	if cfg.VK == "" {
+		t.Skip("BIFROST_VK not set")
+	}
+	ec := chatCompletionCase()
+	_, submitted, body := submitCase(t, ec, vkHeaders(cfg.VK))
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, body := pollOnce(t, pollPath, vkHeaders(cfg.VK))
+	assertPollSuccess(t, code, body)
+}
+
+// TestAuth_VKScoped_DifferentKey_Returns404 submits with VK1 and polls with VK2.
+// The gateway must return 404 because the VK IDs will not match.
+func TestAuth_VKScoped_DifferentKey_Returns404(t *testing.T) {
+	if cfg.VK == "" || cfg.AltVK == "" {
+		t.Skip("both BIFROST_VK and BIFROST_ALT_VK must be set")
+	}
+	ec := chatCompletionCase()
+	_, submitted, body := submitCase(t, ec, vkHeaders(cfg.VK))
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, _ := pollOnce(t, pollPath, vkHeaders(cfg.AltVK))
+	if code != http.StatusNotFound {
+		t.Errorf("expected 404 when polling with a different VK, got %d", code)
+	}
+}
+
+// TestAuth_VKScoped_MissingKeyOnPoll_Returns404 submits with a VK and polls
+// without one. The job stores a VirtualKeyID so the gateway requires a VK on poll.
+func TestAuth_VKScoped_MissingKeyOnPoll_Returns404(t *testing.T) {
+	if cfg.VK == "" {
+		t.Skip("BIFROST_VK not set")
+	}
+	ec := chatCompletionCase()
+	_, submitted, body := submitCase(t, ec, vkHeaders(cfg.VK))
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, _ := pollOnce(t, pollPath, nil)
+	if code != http.StatusNotFound {
+		t.Errorf("expected 404 when polling a VK-scoped job without a VK, got %d", code)
+	}
+}
+
+// TestAuth_PublicJob_AnonymousPoll_Succeeds submits without a VK (VirtualKeyID = nil)
+// and polls without a VK. The VK check is skipped for public jobs.
+func TestAuth_PublicJob_AnonymousPoll_Succeeds(t *testing.T) {
+	ec := chatCompletionCase()
+	_, submitted, body := submitCase(t, ec, nil)
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, body := pollOnce(t, pollPath, nil)
+	assertPollSuccess(t, code, body)
+}
+
+// TestAuth_PublicJob_VKPoll_Succeeds submits without a VK and polls with one.
+// Per docs: "Jobs created without a virtual key are not virtual-key scoped, so they
+// can be polled by any caller that passes your gateway auth/middleware checks."
+func TestAuth_PublicJob_VKPoll_Succeeds(t *testing.T) {
+	if cfg.VK == "" {
+		t.Skip("BIFROST_VK not set")
+	}
+	ec := chatCompletionCase()
+	_, submitted, body := submitCase(t, ec, nil)
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, body := pollOnce(t, pollPath, vkHeaders(cfg.VK))
+	assertPollSuccess(t, code, body)
+}
+
+// vkPrefixed returns true when vk begins with the governance virtual-key prefix "sk-bf-".
+// Only keys with this prefix are recognised by the Authorization, x-api-key, and
+// x-goog-api-key header paths in ConvertToBifrostContext.
+func vkPrefixed(vk string) bool {
+	return strings.HasPrefix(strings.ToLower(vk), "sk-bf-")
+}
+
+// TestAuth_BearerVK_SameKey_Succeeds submits with "Authorization: Bearer <vk>" and
+// polls with the same header.  Verifies the Bearer token path in ConvertToBifrostContext.
+func TestAuth_BearerVK_SameKey_Succeeds(t *testing.T) {
+	if cfg.VK == "" || !vkPrefixed(cfg.VK) {
+		t.Skip("BIFROST_VK not set or does not start with sk-bf- prefix")
+	}
+	ec := chatCompletionCase()
+	headers := map[string]string{"Authorization": "Bearer " + cfg.VK}
+	_, submitted, body := submitCase(t, ec, headers)
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, body := pollOnce(t, pollPath, headers)
+	assertPollSuccess(t, code, body)
+}
+
+// TestAuth_ApiKeyVK_SameKey_Succeeds submits with "x-api-key: <vk>" and polls with
+// the same header.  Verifies the x-api-key path in ConvertToBifrostContext.
+func TestAuth_ApiKeyVK_SameKey_Succeeds(t *testing.T) {
+	if cfg.VK == "" || !vkPrefixed(cfg.VK) {
+		t.Skip("BIFROST_VK not set or does not start with sk-bf- prefix")
+	}
+	ec := chatCompletionCase()
+	headers := map[string]string{"x-api-key": cfg.VK}
+	_, submitted, body := submitCase(t, ec, headers)
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, body := pollOnce(t, pollPath, headers)
+	assertPollSuccess(t, code, body)
+}
+
+// TestAuth_GoogApiKeyVK_SameKey_Succeeds submits with "x-goog-api-key: <vk>" and polls
+// with the same header.  Verifies the x-goog-api-key path in ConvertToBifrostContext.
+func TestAuth_GoogApiKeyVK_SameKey_Succeeds(t *testing.T) {
+	if cfg.VK == "" || !vkPrefixed(cfg.VK) {
+		t.Skip("BIFROST_VK not set or does not start with sk-bf- prefix")
+	}
+	ec := chatCompletionCase()
+	headers := map[string]string{"x-goog-api-key": cfg.VK}
+	_, submitted, body := submitCase(t, ec, headers)
+	if submitted.ID == "" {
+		t.Fatalf("submit returned no job id: %s", body)
+	}
+	pollPath := jobPollPath(ec.pollBase, submitted.ID)
+	code, _, body := pollOnce(t, pollPath, headers)
+	assertPollSuccess(t, code, body)
+}
--- a/tests/async/fixtures_test.go
+++ b/tests/async/fixtures_test.go
@@ -0,0 +1,220 @@
+package async
+
+import (
+	"bytes"
+	"image"
+	"image/color"
+	"image/png"
+	"os"
+	"path/filepath"
+)
+
+// endpointCase describes a single async endpoint and the request payload to send.
+type endpointCase struct {
+	name       string
+	submitPath string // POST target, e.g. /v1/async/chat/completions
+	pollBase   string // GET base; job ID is appended as /{job_id}
+	body       map[string]any
+	multipart  *multipartCase
+}
+
+// multipartCase holds fields and named files for a multipart/form-data submission.
+type multipartCase struct {
+	fields map[string]string
+	files  map[string]fileFixture
+}
+
+type fileFixture struct {
+	filename string
+	data     []byte
+}
+
+// defaultModels maps each ASYNC_*_MODEL env key to its default model string.
+var defaultModels = map[string]string{
+	"ASYNC_TEXT_COMPLETION_MODEL": "openai/gpt-3.5-turbo-instruct",
+	"ASYNC_CHAT_COMPLETION_MODEL": "openai/gpt-4o-mini",
+	"ASYNC_RESPONSES_MODEL":       "openai/gpt-4o-mini",
+	"ASYNC_EMBEDDINGS_MODEL":      "openai/text-embedding-3-small",
+	"ASYNC_SPEECH_MODEL":          "openai/tts-1",
+	"ASYNC_TRANSCRIPTION_MODEL":   "openai/whisper-1",
+	"ASYNC_IMAGE_GEN_MODEL":       "openai/dall-e-3",
+	"ASYNC_IMAGE_EDIT_MODEL":      "openai/dall-e-2",
+	"ASYNC_IMAGE_VARIATION_MODEL": "openai/dall-e-2",
+	"ASYNC_RERANK_MODEL":          "cohere/rerank-english-v3.0",
+	"ASYNC_OCR_MODEL":             "mistral/mistral-ocr-latest",
+}
+
+// modelFor returns the env-var override for envKey, falling back to the default in defaultModels.
+func modelFor(envKey string) string {
+	if v := os.Getenv(envKey); v != "" {
+		return v
+	}
+	return defaultModels[envKey]
+}
+
+// endpointCases returns the full set of async endpoint fixtures, one per supported endpoint.
+// Override any model via the corresponding ASYNC_*_MODEL environment variable.
+func endpointCases() []endpointCase {
+	return []endpointCase{
+		{
+			name:       "text_completions",
+			submitPath: "/v1/async/completions",
+			pollBase:   "/v1/async/completions",
+			body: map[string]any{
+				"model":      modelFor("ASYNC_TEXT_COMPLETION_MODEL"),
+				"prompt":     "Say hello in one word.",
+				"max_tokens": 10,
+			},
+		},
+		{
+			name:       "chat_completions",
+			submitPath: "/v1/async/chat/completions",
+			pollBase:   "/v1/async/chat/completions",
+			body: map[string]any{
+				"model": modelFor("ASYNC_CHAT_COMPLETION_MODEL"),
+				"messages": []map[string]any{
+					{"role": "user", "content": "Say hello in one word."},
+				},
+				"max_tokens": 10,
+			},
+		},
+		{
+			name:       "responses",
+			submitPath: "/v1/async/responses",
+			pollBase:   "/v1/async/responses",
+			body: map[string]any{
+				"model": modelFor("ASYNC_RESPONSES_MODEL"),
+				"input": "Say hello in one word.",
+			},
+		},
+		{
+			name:       "embeddings",
+			submitPath: "/v1/async/embeddings",
+			pollBase:   "/v1/async/embeddings",
+			body: map[string]any{
+				"model": modelFor("ASYNC_EMBEDDINGS_MODEL"),
+				"input": "Hello world",
+			},
+		},
+		{
+			name:       "speech",
+			submitPath: "/v1/async/audio/speech",
+			pollBase:   "/v1/async/audio/speech",
+			body: map[string]any{
+				"model": modelFor("ASYNC_SPEECH_MODEL"),
+				"input": "Hello",
+				"voice": "alloy",
+			},
+		},
+		{
+			name:       "transcriptions",
+			submitPath: "/v1/async/audio/transcriptions",
+			pollBase:   "/v1/async/audio/transcriptions",
+			multipart: &multipartCase{
+				fields: map[string]string{
+					"model": modelFor("ASYNC_TRANSCRIPTION_MODEL"),
+				},
+				files: map[string]fileFixture{
+					"file": {filename: "sample.mp3", data: sampleAudio()},
+				},
+			},
+		},
+		{
+			name:       "image_generations",
+			submitPath: "/v1/async/images/generations",
+			pollBase:   "/v1/async/images/generations",
+			body: map[string]any{
+				"model":  modelFor("ASYNC_IMAGE_GEN_MODEL"),
+				"prompt": "A simple red circle on a white background",
+				"n":      1,
+				"size":   "1024x1024",
+			},
+		},
+		{
+			name:       "image_edits",
+			submitPath: "/v1/async/images/edits",
+			pollBase:   "/v1/async/images/edits",
+			multipart: &multipartCase{
+				fields: map[string]string{
+					"model":  modelFor("ASYNC_IMAGE_EDIT_MODEL"),
+					"prompt": "Make it blue",
+					"n":      "1",
+					"size":   "256x256",
+				},
+				files: map[string]fileFixture{
+					"image": {filename: "image.png", data: samplePNG()},
+				},
+			},
+		},
+		{
+			name:       "image_variations",
+			submitPath: "/v1/async/images/variations",
+			pollBase:   "/v1/async/images/variations",
+			multipart: &multipartCase{
+				fields: map[string]string{
+					"model": modelFor("ASYNC_IMAGE_VARIATION_MODEL"),
+					"n":     "1",
+					"size":  "256x256",
+				},
+				files: map[string]fileFixture{
+					"image": {filename: "image.png", data: samplePNG()},
+				},
+			},
+		},
+		{
+			name:       "rerank",
+			submitPath: "/v1/async/rerank",
+			pollBase:   "/v1/async/rerank",
+			body: map[string]any{
+				"model": modelFor("ASYNC_RERANK_MODEL"),
+				"query": "What is the capital of France?",
+				"documents": []map[string]any{
+					{"text": "Paris is the capital of France."},
+					{"text": "London is the capital of the United Kingdom."},
+					{"text": "Berlin is the capital of Germany."},
+				},
+			},
+		},
+		{
+			name:       "ocr",
+			submitPath: "/v1/async/ocr",
+			pollBase:   "/v1/async/ocr",
+			body: map[string]any{
+				"model": modelFor("ASYNC_OCR_MODEL"),
+				"document": map[string]any{
+					"type":      "image_url",
+					"image_url": envOr("ASYNC_OCR_IMAGE_URL", "https://pestworldcdn-dcf2a8gbggazaghf.z01.azurefd.net/media/561791/carpenter-ant4.jpg"),
+				},
+			},
+		},
+	}
+}
+
+// sampleAudio reads core/internal/llmtests/scenarios/media/sample.mp3.
+// go test sets the working directory to the package source directory, so the
+// relative path is stable without runtime.Caller (which breaks under -trimpath).
+func sampleAudio() []byte {
+	mediaPath := filepath.Join("..", "..", "core", "internal", "llmtests", "scenarios", "media", "sample.mp3")
+	data, err := os.ReadFile(mediaPath)
+	if err != nil {
+		panic("sampleAudio: cannot read " + mediaPath + ": " + err.Error())
+	}
+	return data
+}
+
+// samplePNG generates a 256x256 white RGBA PNG for image edit / variation fixtures.
+// DALL-E 2 requires images with an alpha channel (RGBA PNG).
+func samplePNG() []byte {
+	img := image.NewRGBA(image.Rect(0, 0, 256, 256))
+	white := color.RGBA{R: 255, G: 255, B: 255, A: 255}
+	for y := range 256 {
+		for x := range 256 {
+			img.Set(x, y, white)
+		}
+	}
+	var buf bytes.Buffer
+	if err := png.Encode(&buf, img); err != nil {
+		panic("samplePNG: encode failed: " + err.Error())
+	}
+	return buf.Bytes()
+}
--- a/tests/async/go.mod
+++ b/tests/async/go.mod
@@ -0,0 +1,3 @@
+module github.com/maximhq/bifrost/tests/async
+
+go 1.26.2
--- a/tests/async/helpers_test.go
+++ b/tests/async/helpers_test.go
@@ -0,0 +1,276 @@
+package async
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"mime/multipart"
+	"net/http"
+	"os"
+	"testing"
+	"time"
+)
+
+const (
+	defaultBaseURL      = "http://localhost:8080"
+	defaultPollTimeout  = 30 * time.Second
+	defaultPollInterval = 500 * time.Millisecond
+)
+
+// httpClient is used for all test HTTP calls; the 15s timeout prevents CI hangs.
+var httpClient = &http.Client{Timeout: 15 * time.Second}
+
+// cfg holds e2e configuration sourced from environment variables at startup.
+var cfg = struct {
+	BaseURL      string
+	VK           string // BIFROST_VK — primary virtual key
+	AltVK        string // BIFROST_ALT_VK — a second, different virtual key for auth tests
+	PollTimeout  time.Duration
+	PollInterval time.Duration
+}{
+	BaseURL:      envOr("BIFROST_BASE_URL", defaultBaseURL),
+	VK:           os.Getenv("BIFROST_VK"),
+	AltVK:        os.Getenv("BIFROST_ALT_VK"),
+	PollTimeout:  parseDuration(os.Getenv("BIFROST_POLL_TIMEOUT"), defaultPollTimeout),
+	PollInterval: parseDuration(os.Getenv("BIFROST_POLL_INTERVAL"), defaultPollInterval),
+}
+
+func envOr(key, fallback string) string {
+	if v := os.Getenv(key); v != "" {
+		return v
+	}
+	return fallback
+}
+
+func parseDuration(s string, fallback time.Duration) time.Duration {
+	if s == "" {
+		return fallback
+	}
+	d, err := time.ParseDuration(s)
+	if err != nil {
+		return fallback
+	}
+	return d
+}
+
+// testMode describes one execution round for the core test suites.
+type testMode struct {
+	name    string
+	headers map[string]string // headers to attach to every submit and poll call
+}
+
+// testModes returns the rounds every core test must execute.
+// When BIFROST_VK is unset, only the global (no-VK) round runs.
+func testModes() []testMode {
+	modes := []testMode{
+		{name: "global", headers: nil},
+	}
+	if cfg.VK != "" {
+		modes = append(modes, testMode{name: "with_vk", headers: vkHeaders(cfg.VK)})
+	}
+	return modes
+}
+
+// --- Response types ---
+
+// AsyncJobResponse mirrors the gateway's JSON envelope for async job responses.
+type AsyncJobResponse struct {
+	ID          string          `json:"id"`
+	Status      string          `json:"status"`
+	CreatedAt   time.Time       `json:"created_at"`
+	CompletedAt *time.Time      `json:"completed_at"`
+	ExpiresAt   *time.Time      `json:"expires_at"`
+	StatusCode  int             `json:"status_code"`
+	Result      json.RawMessage `json:"result"`
+	Error       json.RawMessage `json:"error"`
+}
+
+func (j AsyncJobResponse) isTerminal() bool {
+	return j.Status == "completed" || j.Status == "failed"
+}
+
+// --- HTTP helpers ---
+
+// submitJSON POSTs a JSON body and returns the HTTP status code, decoded response, and raw body.
+func submitJSON(t *testing.T, path string, body any, headers map[string]string) (int, AsyncJobResponse, []byte) {
+	t.Helper()
+	raw, err := json.Marshal(body)
+	if err != nil {
+		t.Fatalf("submitJSON: marshal: %v", err)
+	}
+	req, err := http.NewRequest(http.MethodPost, cfg.BaseURL+path, bytes.NewReader(raw))
+	if err != nil {
+		t.Fatalf("submitJSON: new request: %v", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+	return doRequest(t, req)
+}
+
+// submitRaw POSTs arbitrary bytes — used for malformed-JSON validation tests.
+func submitRaw(t *testing.T, path string, raw []byte, contentType string, headers map[string]string) (int, []byte) {
+	t.Helper()
+	req, err := http.NewRequest(http.MethodPost, cfg.BaseURL+path, bytes.NewReader(raw))
+	if err != nil {
+		t.Fatalf("submitRaw: new request: %v", err)
+	}
+	req.Header.Set("Content-Type", contentType)
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+	code, _, body := doRequest(t, req)
+	return code, body
+}
+
+// submitMultipart POSTs a multipart/form-data body.
+func submitMultipart(t *testing.T, path string, mp *multipartCase, headers map[string]string) (int, AsyncJobResponse, []byte) {
+	t.Helper()
+	var buf bytes.Buffer
+	w := multipart.NewWriter(&buf)
+	for k, v := range mp.fields {
+		if err := w.WriteField(k, v); err != nil {
+			t.Fatalf("submitMultipart: write field %q: %v", k, err)
+		}
+	}
+	for fieldName, ff := range mp.files {
+		fw, err := w.CreateFormFile(fieldName, ff.filename)
+		if err != nil {
+			t.Fatalf("submitMultipart: create form file %q: %v", fieldName, err)
+		}
+		if _, err := fw.Write(ff.data); err != nil {
+			t.Fatalf("submitMultipart: write file %q: %v", fieldName, err)
+		}
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("submitMultipart: close writer: %v", err)
+	}
+
+	req, err := http.NewRequest(http.MethodPost, cfg.BaseURL+path, &buf)
+	if err != nil {
+		t.Fatalf("submitMultipart: new request: %v", err)
+	}
+	req.Header.Set("Content-Type", w.FormDataContentType())
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+	return doRequest(t, req)
+}
+
+// submitCase dispatches to submitJSON or submitMultipart based on the fixture type.
+func submitCase(t *testing.T, ec endpointCase, headers map[string]string) (int, AsyncJobResponse, []byte) {
+	t.Helper()
+	if ec.multipart != nil {
+		return submitMultipart(t, ec.submitPath, ec.multipart, headers)
+	}
+	return submitJSON(t, ec.submitPath, ec.body, headers)
+}
+
+// pollOnce performs a single GET and returns HTTP status, decoded response, and raw body.
+func pollOnce(t *testing.T, pollPath string, headers map[string]string) (int, AsyncJobResponse, []byte) {
+	t.Helper()
+	req, err := http.NewRequest(http.MethodGet, cfg.BaseURL+pollPath, nil)
+	if err != nil {
+		t.Fatalf("pollOnce: new request: %v", err)
+	}
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+	return doRequest(t, req)
+}
+
+// pollUntilTerminal polls every cfg.PollInterval until the job is completed/failed or cfg.PollTimeout elapses.
+func pollUntilTerminal(t *testing.T, pollPath string, headers map[string]string) (int, AsyncJobResponse) {
+	t.Helper()
+	deadline := time.Now().Add(cfg.PollTimeout)
+	for time.Now().Before(deadline) {
+		code, job, _ := pollOnce(t, pollPath, headers)
+		if job.isTerminal() {
+			return code, job
+		}
+		if code != http.StatusAccepted {
+			t.Fatalf("unexpected HTTP %d while polling %s (status=%q)", code, pollPath, job.Status)
+		}
+		time.Sleep(cfg.PollInterval)
+	}
+	t.Fatalf("timed out after %s waiting for terminal status on %s", cfg.PollTimeout, pollPath)
+	return 0, AsyncJobResponse{}
+}
+
+// --- Path / header helpers ---
+
+// jobPollPath builds the GET path for a job: /pollBase/{jobID}.
+func jobPollPath(base, jobID string) string {
+	return base + "/" + jobID
+}
+
+// vkHeaders returns a header map carrying the given virtual key.
+// Returns nil when vk is empty so callers can safely pass it to submitCase.
+func vkHeaders(vk string) map[string]string {
+	if vk == "" {
+		return nil
+	}
+	return map[string]string{"x-bf-vk": vk}
+}
+
+// withTTLHeader copies headers and appends x-bf-async-job-result-ttl.
+func withTTLHeader(headers map[string]string, ttlSeconds int) map[string]string {
+	out := make(map[string]string, len(headers)+1)
+	for k, v := range headers {
+		out[k] = v
+	}
+	out["x-bf-async-job-result-ttl"] = fmt.Sprintf("%d", ttlSeconds)
+	return out
+}
+
+// withRawHeader copies headers and appends a single key/value pair.
+func withRawHeader(headers map[string]string, key, value string) map[string]string {
+	out := make(map[string]string, len(headers)+1)
+	for k, v := range headers {
+		out[k] = v
+	}
+	out[key] = value
+	return out
+}
+
+// doRequest executes an HTTP request and returns (statusCode, decoded AsyncJobResponse, rawBody).
+func doRequest(t *testing.T, req *http.Request) (int, AsyncJobResponse, []byte) {
+	t.Helper()
+	resp, err := httpClient.Do(req)
+	if err != nil {
+		t.Fatalf("HTTP %s %s failed: %v", req.Method, req.URL, err)
+	}
+	defer resp.Body.Close()
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatalf("read response body: %v", err)
+	}
+	var job AsyncJobResponse
+	_ = json.Unmarshal(body, &job)
+	return resp.StatusCode, job, body
+}
+
+// chatCompletionCase returns the chat_completions fixture — used as a representative
+// endpoint in auth and TTL tests where endpoint variety is not the focus.
+func chatCompletionCase() endpointCase {
+	for _, ec := range endpointCases() {
+		if ec.name == "chat_completions" {
+			return ec
+		}
+	}
+	panic("chatCompletionCase: fixture not found")
+}
+
+// TestMain checks that the Bifrost gateway is reachable before running any tests.
+// Set BIFROST_BASE_URL to override the default http://localhost:8080.
+func TestMain(m *testing.M) {
+	resp, err := httpClient.Get(cfg.BaseURL + "/health")
+	if err != nil || resp.StatusCode >= 500 {
+		fmt.Printf("SKIP: Bifrost gateway not reachable at %s (err=%v)\n", cfg.BaseURL, err)
+		os.Exit(0)
+	}
+	resp.Body.Close()
+	os.Exit(m.Run())
+}
--- a/tests/async/integration_route_test.go
+++ b/tests/async/integration_route_test.go
@@ -0,0 +1,174 @@
+package async
+
+import (
+	"encoding/json"
+	"maps"
+	"net/http"
+	"os"
+	"strings"
+	"testing"
+)
+
+// Integration route tests verify that x-bf-async and x-bf-async-id headers work on
+// provider integration routes. These routes apply a provider-specific response converter,
+// so the envelope differs from /v1/async/* endpoints:
+//
+//	Submit  (x-bf-async: true)     → HTTP 200 (not 202)
+//	Retrieve (x-bf-async-id: <id>) → HTTP 200 for any job state
+//
+// Optional env:
+//
+//	BIFROST_INTEGRATION_PATH  — override the default /openai/v1/responses
+//	BIFROST_INTEGRATION_MODEL — model string; defaults to ASYNC_RESPONSES_MODEL default
+//
+// Note: only routes with AsyncResponsesResponseConverter support x-bf-async.
+// AsyncChatResponseConverter is not implemented on any route — the Responses API
+// path (/openai/v1/responses) is the only integration route that supports async.
+func integrationPath() string {
+	return envOr("BIFROST_INTEGRATION_PATH", "/openai/v1/responses")
+}
+
+func integrationModel() string {
+	if v := os.Getenv("BIFROST_INTEGRATION_MODEL"); v != "" {
+		return v
+	}
+	return modelFor("ASYNC_RESPONSES_MODEL")
+}
+
+// assert4xx fails the test unless code is a 4xx client error, catching 5xx regressions.
+func assert4xx(t *testing.T, code int, body []byte) {
+	t.Helper()
+	if code < 400 || code >= 500 {
+		t.Fatalf("expected 4xx, got %d: %s", code, body)
+	}
+}
+
+// integrationJobID extracts the job UUID from an integration route response body.
+// All integration converters preserve the async job ID in the top-level "id" field.
+func integrationJobID(t *testing.T, body []byte) string {
+	t.Helper()
+	var m map[string]any
+	if err := json.Unmarshal(body, &m); err != nil {
+		return ""
+	}
+	if id, ok := m["id"].(string); ok {
+		return id
+	}
+	return ""
+}
+
+// pollIntegration POSTs to an integration path with x-bf-async-id header to retrieve a job.
+// Integration routes use the same POST method for both submit and retrieve.
+func pollIntegration(t *testing.T, path, jobID string, headers map[string]string) (int, []byte) {
+	t.Helper()
+	h := make(map[string]string, len(headers)+1)
+	maps.Copy(h, headers)
+	h["x-bf-async-id"] = jobID
+	code, body := submitRaw(t, path, []byte("{}"), "application/json", h)
+	return code, body
+}
+
+// integrationSubmitBody returns a minimal Responses API body for the integration path.
+func integrationSubmitBody() map[string]any {
+	return map[string]any{
+		"model": integrationModel(),
+		"input": "Say hello in one word.",
+	}
+}
+
+// TestIntegration_AsyncCreate_Returns200WithJobID submits a chat request via an integration
+// route with x-bf-async header and confirms the response is 200 OK with a job UUID.
+// Integration routes return 200 (not 202) because the response passes through the
+// provider-specific converter before being sent.
+func TestIntegration_AsyncCreate_Returns200WithJobID(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			headers := withRawHeader(mode.headers, "x-bf-async", "true")
+			code, _, body := submitJSON(t, integrationPath(), integrationSubmitBody(), headers)
+			if code != http.StatusOK {
+				t.Fatalf("expected 200 from integration async submit, got %d: %s", code, body)
+			}
+			jobID := integrationJobID(t, body)
+			if jobID == "" {
+				t.Fatalf("no job id in integration route response: %s", body)
+			}
+			parts := strings.Split(jobID, "-")
+			if len(parts) != 5 || len(parts[0]) != 8 || len(parts[1]) != 4 ||
+				len(parts[2]) != 4 || len(parts[3]) != 4 || len(parts[4]) != 12 {
+				t.Errorf("id %q does not look like a UUID", jobID)
+			}
+		})
+	}
+}
+
+// TestIntegration_AsyncRetrieve_Returns200 submits an async job on an integration route
+// and polls it via x-bf-async-id header, confirming retrieve also returns 200 OK.
+func TestIntegration_AsyncRetrieve_Returns200(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			headers := withRawHeader(mode.headers, "x-bf-async", "true")
+			code, _, body := submitJSON(t, integrationPath(), integrationSubmitBody(), headers)
+			if code != http.StatusOK {
+				t.Fatalf("submit failed with %d: %s", code, body)
+			}
+			jobID := integrationJobID(t, body)
+			if jobID == "" {
+				t.Fatalf("no job id in submit response: %s", body)
+			}
+
+			pollCode, pollBody := pollIntegration(t, integrationPath(), jobID, mode.headers)
+			if pollCode != http.StatusOK {
+				t.Errorf("expected 200 on integration retrieve, got %d: %s", pollCode, pollBody)
+			}
+		})
+	}
+}
+
+// TestIntegration_AsyncRetrieve_NonExistentJob_Returns4xx polls an integration route with
+// a fake job ID and confirms a non-success status code is returned.
+func TestIntegration_AsyncRetrieve_NonExistentJob_Returns4xx(t *testing.T) {
+	const fakeID = "00000000-0000-0000-0000-000000000000"
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, body := pollIntegration(t, integrationPath(), fakeID, mode.headers)
+			assert4xx(t, code, body)
+		})
+	}
+}
+
+// TestIntegration_AsyncCreate_StreamRejected confirms that submitting a streaming request
+// via x-bf-async is rejected — streaming and async are mutually exclusive.
+func TestIntegration_AsyncCreate_StreamRejected(t *testing.T) {
+	streamBody := map[string]any{
+		"model":  integrationModel(),
+		"input":  "Hello",
+		"stream": true,
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			headers := withRawHeader(mode.headers, "x-bf-async", "true")
+			code, _, body := submitJSON(t, integrationPath(), streamBody, headers)
+			assert4xx(t, code, body)
+		})
+	}
+}
+
+// TestIntegration_VKScope_DifferentKey_Returns4xx submits an async job on an integration
+// route with VK1 and retrieves with VK2, confirming VK isolation works on integration routes.
+func TestIntegration_VKScope_DifferentKey_Returns4xx(t *testing.T) {
+	if cfg.VK == "" || cfg.AltVK == "" {
+		t.Skip("both BIFROST_VK and BIFROST_ALT_VK must be set")
+	}
+	headers := withRawHeader(vkHeaders(cfg.VK), "x-bf-async", "true")
+	code, _, body := submitJSON(t, integrationPath(), integrationSubmitBody(), headers)
+	if code != http.StatusOK {
+		t.Fatalf("submit failed with %d: %s", code, body)
+	}
+	jobID := integrationJobID(t, body)
+	if jobID == "" {
+		t.Fatalf("no job id in submit response: %s", body)
+	}
+
+	pollCode, pollBody := pollIntegration(t, integrationPath(), jobID, vkHeaders(cfg.AltVK))
+	assert4xx(t, pollCode, pollBody)
+}
--- a/tests/async/lifecycle_test.go
+++ b/tests/async/lifecycle_test.go
@@ -0,0 +1,180 @@
+package async
+
+import (
+	"encoding/json"
+	"net/http"
+	"testing"
+)
+
+// TestLifecycle_AllEndpoints_ReachesTerminalState submits a job for every supported
+// endpoint and polls until it reaches completed or failed, then validates the
+// terminal response shape. Passes for either outcome — the test asserts the async
+// mechanism itself, not model availability.
+// Runs in both global and VK modes.
+func TestLifecycle_AllEndpoints_ReachesTerminalState(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			for _, ec := range endpointCases() {
+				t.Run(ec.name, func(t *testing.T) {
+					_, submitted, body := submitCase(t, ec, mode.headers)
+					if submitted.ID == "" {
+						t.Fatalf("submit returned no job id: %s", body)
+					}
+
+					pollPath := jobPollPath(ec.pollBase, submitted.ID)
+					code, job := pollUntilTerminal(t, pollPath, mode.headers)
+
+					if code != http.StatusOK {
+						t.Errorf("expected 200 for terminal job, got %d", code)
+					}
+					if job.ID != submitted.ID {
+						t.Errorf("polled id %q does not match submitted id %q", job.ID, submitted.ID)
+					}
+					if job.CompletedAt == nil {
+						t.Error("completed_at must be set on a terminal job")
+					}
+					if job.ExpiresAt == nil {
+						t.Error("expires_at must be set on a terminal job")
+					}
+					if job.CompletedAt != nil && job.ExpiresAt != nil && !job.ExpiresAt.After(*job.CompletedAt) {
+						t.Error("expires_at must be after completed_at")
+					}
+
+					switch job.Status {
+					case "completed":
+						if len(job.Result) == 0 || string(job.Result) == "null" {
+							t.Error("completed job must have a non-null result")
+						}
+					case "failed":
+						if len(job.Error) == 0 || string(job.Error) == "null" {
+							t.Error("failed job must have a non-null error")
+						}
+						if job.StatusCode == 0 {
+							t.Error("failed job must carry a non-zero status_code")
+						}
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestLifecycle_Poll_NonExistentJob_Returns404 confirms that polling a random job ID
+// returns 404 regardless of VK mode (job lookup fails before VK check).
+// Uses chat_completions as a representative endpoint — all endpoints share the same
+// RetrieveJob() path, so repeating across all 11 adds no coverage.
+func TestLifecycle_Poll_NonExistentJob_Returns404(t *testing.T) {
+	const fakeID = "00000000-0000-0000-0000-000000000000"
+	ec := chatCompletionCase()
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			pollPath := jobPollPath(ec.pollBase, fakeID)
+			code, _, _ := pollOnce(t, pollPath, mode.headers)
+			if code != http.StatusNotFound {
+				t.Errorf("expected 404 for non-existent job, got %d", code)
+			}
+		})
+	}
+}
+
+// TestLifecycle_CompletedJobResultShape checks that completed jobs carry the expected
+// top-level fields in their result JSON.  If a job fails (e.g., no live API key), the
+// shape check is skipped for that case — the test asserts structure, not model availability.
+func TestLifecycle_CompletedJobResultShape(t *testing.T) {
+	type shapeCheck struct {
+		name  string
+		check func(t *testing.T, result json.RawMessage)
+	}
+
+	shapeChecks := map[string]shapeCheck{
+		"chat_completions": {
+			"choices[]",
+			func(t *testing.T, result json.RawMessage) {
+				var r struct {
+					Choices []json.RawMessage `json:"choices"`
+				}
+				if err := json.Unmarshal(result, &r); err != nil {
+					t.Fatalf("unmarshal choices: %v", err)
+				}
+				if len(r.Choices) == 0 {
+					t.Error("completed chat job must have at least one choice")
+				}
+			},
+		},
+		"embeddings": {
+			"data[]",
+			func(t *testing.T, result json.RawMessage) {
+				var r struct {
+					Data []json.RawMessage `json:"data"`
+				}
+				if err := json.Unmarshal(result, &r); err != nil {
+					t.Fatalf("unmarshal data: %v", err)
+				}
+				if len(r.Data) == 0 {
+					t.Error("completed embeddings job must have at least one data entry")
+				}
+			},
+		},
+		"rerank": {
+			"results[]",
+			func(t *testing.T, result json.RawMessage) {
+				var r struct {
+					Results []json.RawMessage `json:"results"`
+				}
+				if err := json.Unmarshal(result, &r); err != nil {
+					t.Fatalf("unmarshal results: %v", err)
+				}
+				if len(r.Results) == 0 {
+					t.Error("completed rerank job must have at least one result")
+				}
+			},
+		},
+	}
+
+	for _, ec := range endpointCases() {
+		sc, ok := shapeChecks[ec.name]
+		if !ok {
+			continue
+		}
+		t.Run(ec.name+"/"+sc.name, func(t *testing.T) {
+			_, submitted, body := submitCase(t, ec, nil)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			_, job := pollUntilTerminal(t, pollPath, nil)
+			if job.Status != "completed" {
+				t.Skipf("job status=%q (not completed) — shape check skipped", job.Status)
+			}
+			sc.check(t, job.Result)
+		})
+	}
+}
+
+// TestLifecycle_Poll_WrongEndpointType_Returns404 submits a job on one endpoint and
+// polls it via a different endpoint's path, expecting 404 (type mismatch).
+func TestLifecycle_Poll_WrongEndpointType_Returns404(t *testing.T) {
+	cases := endpointCases()
+	if len(cases) < 2 {
+		t.Skip("need at least two endpoint cases")
+	}
+
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			// Submit on cases[0], poll via cases[1]'s poll base.
+			submitter := cases[0]
+			wrongBase := cases[1].pollBase
+
+			_, submitted, body := submitCase(t, submitter, mode.headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+
+			pollPath := jobPollPath(wrongBase, submitted.ID)
+			code, _, _ := pollOnce(t, pollPath, mode.headers)
+			if code != http.StatusNotFound {
+				t.Errorf("expected 404 when polling with wrong endpoint type, got %d", code)
+			}
+		})
+	}
+}
--- a/tests/async/submit_test.go
+++ b/tests/async/submit_test.go
@@ -0,0 +1,89 @@
+package async
+
+import (
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+)
+
+// TestSubmit_AllEndpoints_Returns202 verifies that every async endpoint immediately
+// returns 202 Accepted with a well-formed job envelope.
+// Runs once in global mode (no VK) and once with BIFROST_VK when set.
+func TestSubmit_AllEndpoints_Returns202(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			for _, ec := range endpointCases() {
+				t.Run(ec.name, func(t *testing.T) {
+					code, job, body := submitCase(t, ec, mode.headers)
+
+					if code != http.StatusAccepted {
+						t.Fatalf("expected 202, got %d: %s", code, body)
+					}
+					if job.ID == "" {
+						t.Fatal("response missing id")
+					}
+					// UUID format: 8-4-4-4-12 hex groups separated by hyphens.
+					parts := strings.Split(job.ID, "-")
+					if len(parts) != 5 || len(parts[0]) != 8 || len(parts[1]) != 4 ||
+						len(parts[2]) != 4 || len(parts[3]) != 4 || len(parts[4]) != 12 {
+						t.Errorf("id %q does not look like a UUID", job.ID)
+					}
+					if job.Status != "pending" {
+						t.Errorf("expected status=pending, got %q", job.Status)
+					}
+					if job.CreatedAt.IsZero() {
+						t.Error("created_at is zero")
+					}
+					if time.Since(job.CreatedAt) > 30*time.Second {
+						t.Errorf("created_at %v appears stale (>30s ago)", job.CreatedAt)
+					}
+					if job.CompletedAt != nil {
+						t.Error("completed_at must be absent on a freshly submitted job")
+					}
+					if job.ExpiresAt != nil {
+						t.Error("expires_at must be absent on a freshly submitted job")
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestSubmit_AllEndpoints_PollPathReturnsPending verifies that polling immediately
+// after submission yields a non-terminal (pending/processing) or just-completed state
+// with the correct HTTP status code for each.
+// Runs in both global and VK modes.
+func TestSubmit_AllEndpoints_PollPathReturnsPending(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			for _, ec := range endpointCases() {
+				t.Run(ec.name, func(t *testing.T) {
+					submitCode, submitted, body := submitCase(t, ec, mode.headers)
+					if submitCode != http.StatusAccepted {
+						t.Fatalf("expected submit 202, got %d: %s", submitCode, body)
+					}
+					if submitted.ID == "" {
+						t.Fatalf("submit returned no job id: %s", body)
+					}
+
+					pollPath := jobPollPath(ec.pollBase, submitted.ID)
+					code, polled, _ := pollOnce(t, pollPath, mode.headers)
+
+					switch polled.Status {
+					case "pending", "processing":
+						if code != http.StatusAccepted {
+							t.Errorf("expected 202 for status %q, got %d", polled.Status, code)
+						}
+					case "completed", "failed":
+						if code != http.StatusOK {
+							t.Errorf("expected 200 for terminal status %q, got %d", polled.Status, code)
+						}
+					default:
+						t.Errorf("unexpected status %q (HTTP %d)", polled.Status, code)
+					}
+				})
+			}
+		})
+	}
+}
--- a/tests/async/ttl_test.go
+++ b/tests/async/ttl_test.go
@@ -0,0 +1,157 @@
+package async
+
+import (
+	"net/http"
+	"testing"
+	"time"
+)
+
+// TTL tests use chat_completions as a representative endpoint and run in both
+// global and VK modes. They verify that expires_at is set correctly relative to
+// completed_at based on the TTL value in effect.
+
+// TestTTL_DefaultApplied verifies that when no TTL header is sent, expires_at is
+// approximately 3600s (one hour) after completed_at.
+func TestTTL_DefaultApplied(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			ec := chatCompletionCase()
+			_, submitted, body := submitCase(t, ec, mode.headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			_, job := pollUntilTerminal(t, pollPath, mode.headers)
+			assertTTL(t, job, 3600, 60)
+		})
+	}
+}
+
+// TestTTL_CustomHeaderApplied verifies that x-bf-async-job-result-ttl overrides the
+// default and expires_at is roughly TTL seconds after completed_at.
+func TestTTL_CustomHeaderApplied(t *testing.T) {
+	const customTTL = 120
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			ec := chatCompletionCase()
+			headers := withTTLHeader(mode.headers, customTTL)
+			_, submitted, body := submitCase(t, ec, headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			// Poll must use the mode headers, not the TTL headers (TTL is submit-only).
+			_, job := pollUntilTerminal(t, pollPath, mode.headers)
+			assertTTL(t, job, customTTL, 30)
+		})
+	}
+}
+
+// TestTTL_InvalidHeader_FallsBackToDefault verifies that a non-numeric TTL header
+// is ignored and the server falls back to the default 3600s TTL.
+func TestTTL_InvalidHeader_FallsBackToDefault(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			ec := chatCompletionCase()
+			headers := withRawHeader(mode.headers, "x-bf-async-job-result-ttl", "not-a-number")
+			_, submitted, body := submitCase(t, ec, headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			_, job := pollUntilTerminal(t, pollPath, mode.headers)
+			assertTTL(t, job, 3600, 60)
+		})
+	}
+}
+
+// TestTTL_ZeroHeader_FallsBackToDefault verifies that TTL=0 is treated as invalid
+// (per SubmitJob: if resultTTL <= 0 use default) and falls back to 3600s.
+func TestTTL_ZeroHeader_FallsBackToDefault(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			ec := chatCompletionCase()
+			headers := withTTLHeader(mode.headers, 0)
+			_, submitted, body := submitCase(t, ec, headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			_, job := pollUntilTerminal(t, pollPath, mode.headers)
+			assertTTL(t, job, 3600, 60)
+		})
+	}
+}
+
+// TestTTL_NegativeHeader_FallsBackToDefault verifies that a negative TTL value
+// falls back to the default 3600s.
+func TestTTL_NegativeHeader_FallsBackToDefault(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			ec := chatCompletionCase()
+			headers := withTTLHeader(mode.headers, -1)
+			_, submitted, body := submitCase(t, ec, headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			_, job := pollUntilTerminal(t, pollPath, mode.headers)
+			assertTTL(t, job, 3600, 60)
+		})
+	}
+}
+
+// TestTTL_ExpiredJob_Returns404 submits a job with a very short TTL, waits for
+// completion, then waits for the TTL to elapse and confirms polling returns 404.
+// Verifies FindAsyncJobByID filters on expires_at > NOW().
+func TestTTL_ExpiredJob_Returns404(t *testing.T) {
+	const shortTTL = 10 // seconds — must be larger than BIFROST_POLL_INTERVAL
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			ec := chatCompletionCase()
+			headers := withTTLHeader(mode.headers, shortTTL)
+			_, submitted, body := submitCase(t, ec, headers)
+			if submitted.ID == "" {
+				t.Fatalf("submit returned no job id: %s", body)
+			}
+
+			pollPath := jobPollPath(ec.pollBase, submitted.ID)
+			pollUntilTerminal(t, pollPath, mode.headers)
+
+			// Poll until 404 (TTL expired) with a generous deadline to avoid flakiness.
+			deadline := time.Now().Add(time.Duration(shortTTL+10) * time.Second)
+			for {
+				code, _, _ := pollOnce(t, pollPath, mode.headers)
+				if code == http.StatusNotFound {
+					break
+				}
+				if time.Now().After(deadline) {
+					t.Fatalf("expected 404 after TTL expiry, last code=%d", code)
+				}
+				time.Sleep(250 * time.Millisecond)
+			}
+		})
+	}
+}
+
+// assertTTL checks that expires_at ≈ completed_at + wantTTLSeconds within toleranceSeconds.
+func assertTTL(t *testing.T, job AsyncJobResponse, wantTTLSeconds, toleranceSeconds int) {
+	t.Helper()
+	if job.CompletedAt == nil {
+		t.Fatal("completed_at is nil, cannot verify TTL")
+	}
+	if job.ExpiresAt == nil {
+		t.Fatal("expires_at is nil, cannot verify TTL")
+	}
+	actual := job.ExpiresAt.Sub(*job.CompletedAt)
+	want := time.Duration(wantTTLSeconds) * time.Second
+	tolerance := time.Duration(toleranceSeconds) * time.Second
+	diff := actual - want
+	if diff < 0 {
+		diff = -diff
+	}
+	if diff > tolerance {
+		t.Errorf("TTL mismatch: expires_at - completed_at = %v, want %v ± %v",
+			actual, want, tolerance)
+	}
+}
--- a/tests/async/validation_test.go
+++ b/tests/async/validation_test.go
@@ -0,0 +1,306 @@
+package async
+
+import (
+	"net/http"
+	"testing"
+)
+
+// streamEndpoints lists async endpoints that reject stream=true in the JSON body.
+// Speech uses stream_format instead and is tested separately.
+// image_edits and image_variations are multipart-only endpoints; their stream field
+// is a multipart form value — not a JSON body field — so they are not listed here.
+var streamEndpoints = []struct {
+	name       string
+	submitPath string
+	body       map[string]any
+}{
+	{
+		name:       "text_completions",
+		submitPath: "/v1/async/completions",
+		body: map[string]any{
+			"model":  modelFor("ASYNC_TEXT_COMPLETION_MODEL"),
+			"prompt": "Hello",
+			"stream": true,
+		},
+	},
+	{
+		name:       "chat_completions",
+		submitPath: "/v1/async/chat/completions",
+		body: map[string]any{
+			"model":    modelFor("ASYNC_CHAT_COMPLETION_MODEL"),
+			"messages": []map[string]any{{"role": "user", "content": "Hello"}},
+			"stream":   true,
+		},
+	},
+	{
+		name:       "responses",
+		submitPath: "/v1/async/responses",
+		body: map[string]any{
+			"model":  modelFor("ASYNC_RESPONSES_MODEL"),
+			"input":  "Hello",
+			"stream": true,
+		},
+	},
+	{
+		name:       "image_generations",
+		submitPath: "/v1/async/images/generations",
+		body: map[string]any{
+			"model":  modelFor("ASYNC_IMAGE_GEN_MODEL"),
+			"prompt": "A circle",
+			"stream": true,
+		},
+	},
+}
+
+// TestValidation_StreamRejected_Returns400 confirms that stream=true is rejected
+// with 400 before any job is created. Runs in both global and VK modes because the
+// stream check happens before VK resolution.
+func TestValidation_StreamRejected_Returns400(t *testing.T) {
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			for _, ep := range streamEndpoints {
+				t.Run(ep.name, func(t *testing.T) {
+					code, _, body := submitJSON(t, ep.submitPath, ep.body, mode.headers)
+					if code != http.StatusBadRequest {
+						t.Errorf("expected 400 for stream=true on %s, got %d: %s",
+							ep.submitPath, code, body)
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestValidation_SpeechStreamFormatRejected_Returns400 confirms that the speech
+// endpoint rejects stream_format=sse with 400.
+func TestValidation_SpeechStreamFormatRejected_Returns400(t *testing.T) {
+	body := map[string]any{
+		"model":         modelFor("ASYNC_SPEECH_MODEL"),
+		"input":         "Hello",
+		"voice":         "alloy",
+		"stream_format": "sse",
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, _, raw := submitJSON(t, "/v1/async/audio/speech", body, mode.headers)
+			if code != http.StatusBadRequest {
+				t.Errorf("expected 400 for stream_format=sse on speech, got %d: %s", code, raw)
+			}
+		})
+	}
+}
+
+// TestValidation_MalformedJSON_Returns400 verifies that sending malformed JSON to any
+// async JSON endpoint returns 400 before a job is created.
+func TestValidation_MalformedJSON_Returns400(t *testing.T) {
+	jsonEndpoints := []endpointCase{}
+	for _, ec := range endpointCases() {
+		if ec.multipart == nil {
+			jsonEndpoints = append(jsonEndpoints, ec)
+		}
+	}
+
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			for _, ec := range jsonEndpoints {
+				t.Run(ec.name, func(t *testing.T) {
+					code, body := submitRaw(t, ec.submitPath, []byte(`{invalid json`),
+						"application/json", mode.headers)
+					if code != http.StatusBadRequest {
+						t.Errorf("expected 400 for malformed JSON on %s, got %d: %s",
+							ec.submitPath, code, body)
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestValidation_TranscriptionStreamRejected_Returns400 confirms that the transcription
+// endpoint rejects stream=true (sent as a multipart field) with 400.
+func TestValidation_TranscriptionStreamRejected_Returns400(t *testing.T) {
+	mp := &multipartCase{
+		fields: map[string]string{
+			"model":  modelFor("ASYNC_TRANSCRIPTION_MODEL"),
+			"stream": "true",
+		},
+		files: map[string]fileFixture{
+			"file": {filename: "sample.mp3", data: sampleAudio()},
+		},
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, _, body := submitMultipart(t, "/v1/async/audio/transcriptions", mp, mode.headers)
+			if code != http.StatusBadRequest {
+				t.Errorf("expected 400 for stream=true on transcription, got %d: %s", code, body)
+			}
+		})
+	}
+}
+
+// TestValidation_MissingModel_Returns400 verifies that submitting without a model field
+// is rejected with 400 across all JSON endpoints.
+func TestValidation_MissingModel_Returns400(t *testing.T) {
+	missingModelCases := []struct {
+		name string
+		path string
+		body map[string]any
+	}{
+		{
+			"chat_completions",
+			"/v1/async/chat/completions",
+			map[string]any{"messages": []map[string]any{{"role": "user", "content": "Hello"}}},
+		},
+		{
+			"text_completions",
+			"/v1/async/completions",
+			map[string]any{"prompt": "Hello"},
+		},
+		{
+			"embeddings",
+			"/v1/async/embeddings",
+			map[string]any{"input": "Hello"},
+		},
+		{
+			"responses",
+			"/v1/async/responses",
+			map[string]any{"input": "Hello"},
+		},
+		{
+			"speech",
+			"/v1/async/audio/speech",
+			map[string]any{"input": "Hello", "voice": "alloy"},
+		},
+		{
+			"rerank",
+			"/v1/async/rerank",
+			map[string]any{
+				"query":     "test",
+				"documents": []map[string]any{{"text": "test document"}},
+			},
+		},
+		{
+			"ocr",
+			"/v1/async/ocr",
+			map[string]any{
+				"document": map[string]any{
+					"type":      "image_url",
+					"image_url": envOr("ASYNC_OCR_IMAGE_URL", "https://pestworldcdn-dcf2a8gbggazaghf.z01.azurefd.net/media/561791/carpenter-ant4.jpg"),
+				},
+			},
+		},
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			for _, mc := range missingModelCases {
+				t.Run(mc.name, func(t *testing.T) {
+					code, _, body := submitJSON(t, mc.path, mc.body, mode.headers)
+					if code != http.StatusBadRequest {
+						t.Errorf("expected 400 for missing model on %s, got %d: %s", mc.path, code, body)
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestValidation_ImageEditStreamRejected_Returns400 confirms that the image edit endpoint
+// rejects stream=true (sent as a multipart form field) with 400. This requires a complete
+// valid multipart body because stream validation runs after successful form parsing.
+func TestValidation_ImageEditStreamRejected_Returns400(t *testing.T) {
+	mp := &multipartCase{
+		fields: map[string]string{
+			"model":  modelFor("ASYNC_IMAGE_EDIT_MODEL"),
+			"prompt": "Make it blue",
+			"stream": "true",
+		},
+		files: map[string]fileFixture{
+			"image": {filename: "image.png", data: samplePNG()},
+		},
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, _, body := submitMultipart(t, "/v1/async/images/edits", mp, mode.headers)
+			if code != http.StatusBadRequest {
+				t.Errorf("expected 400 for stream=true on image edits, got %d: %s", code, body)
+			}
+		})
+	}
+}
+
+// TestValidation_Transcription_MissingFile_Returns400 verifies that a transcription request
+// without the required audio file is rejected with 400 at the multipart parse stage.
+func TestValidation_Transcription_MissingFile_Returns400(t *testing.T) {
+	mp := &multipartCase{
+		fields: map[string]string{
+			"model": modelFor("ASYNC_TRANSCRIPTION_MODEL"),
+		},
+		// no "file" entry
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, _, body := submitMultipart(t, "/v1/async/audio/transcriptions", mp, mode.headers)
+			if code != http.StatusBadRequest {
+				t.Errorf("expected 400 for missing audio file on transcription, got %d: %s", code, body)
+			}
+		})
+	}
+}
+
+// TestValidation_ImageEdit_MissingImage_Returns400 verifies that an image edit request
+// without the required image file is rejected with 400.
+func TestValidation_ImageEdit_MissingImage_Returns400(t *testing.T) {
+	mp := &multipartCase{
+		fields: map[string]string{
+			"model":  modelFor("ASYNC_IMAGE_EDIT_MODEL"),
+			"prompt": "Make it blue",
+		},
+		// no "image" file entry
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, _, body := submitMultipart(t, "/v1/async/images/edits", mp, mode.headers)
+			if code != http.StatusBadRequest {
+				t.Errorf("expected 400 for missing image file on image edits, got %d: %s", code, body)
+			}
+		})
+	}
+}
+
+// TestValidation_ImageVariation_MissingImage_Returns400 verifies that an image variation
+// request without the required image file is rejected with 400.
+func TestValidation_ImageVariation_MissingImage_Returns400(t *testing.T) {
+	mp := &multipartCase{
+		fields: map[string]string{
+			"model": modelFor("ASYNC_IMAGE_VARIATION_MODEL"),
+		},
+		// no "image" file entry
+	}
+	for _, mode := range testModes() {
+		t.Run(mode.name, func(t *testing.T) {
+			code, _, body := submitMultipart(t, "/v1/async/images/variations", mp, mode.headers)
+			if code != http.StatusBadRequest {
+				t.Errorf("expected 400 for missing image file on image variations, got %d: %s", code, body)
+			}
+		})
+	}
+}
+
+// TestHTTP_WrongMethod_Rejected verifies that POST on a poll-only path does not return
+// a success status code.  The converse (GET on a submit path) is not checked here
+// because the server's UI layer intercepts bare GET requests on /v1/async/* paths
+// before the async router is reached.
+func TestHTTP_WrongMethod_Rejected(t *testing.T) {
+	req, err := http.NewRequest(http.MethodPost, cfg.BaseURL+"/v1/async/chat/completions/00000000-0000-0000-0000-000000000000", nil)
+	if err != nil {
+		t.Fatalf("build request: %v", err)
+	}
+	resp, err := httpClient.Do(req)
+	if err != nil {
+		t.Fatalf("POST /v1/async/chat/completions/{id} failed: %v", err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusNotFound && resp.StatusCode != http.StatusMethodNotAllowed {
+		t.Errorf("POST on poll path returned %d, expected 404 or 405", resp.StatusCode)
+	}
+}