90 lines
3.7 KiB
Go
90 lines
3.7 KiB
Go
package schemas
|
|
|
|
// OCRDocumentType specifies the type of document input for an OCR request.
|
|
type OCRDocumentType string
|
|
|
|
const (
|
|
// OCRDocumentTypeDocumentURL represents a document URL input (e.g., PDF URL or base64 data URL).
|
|
OCRDocumentTypeDocumentURL OCRDocumentType = "document_url"
|
|
// OCRDocumentTypeImageURL represents an image URL input.
|
|
OCRDocumentTypeImageURL OCRDocumentType = "image_url"
|
|
)
|
|
|
|
// OCRDocument represents the document input for an OCR request.
|
|
type OCRDocument struct {
|
|
Type OCRDocumentType `json:"type"`
|
|
DocumentURL *string `json:"document_url,omitempty"`
|
|
ImageURL *string `json:"image_url,omitempty"`
|
|
}
|
|
|
|
// OCRParameters contains optional parameters for an OCR request.
|
|
type OCRParameters struct {
|
|
IncludeImageBase64 *bool `json:"include_image_base64,omitempty"`
|
|
Pages []int `json:"pages,omitempty"`
|
|
ImageLimit *int `json:"image_limit,omitempty"`
|
|
ImageMinSize *int `json:"image_min_size,omitempty"`
|
|
TableFormat *string `json:"table_format,omitempty"`
|
|
ExtractHeader *bool `json:"extract_header,omitempty"`
|
|
ExtractFooter *bool `json:"extract_footer,omitempty"`
|
|
BBoxAnnotationFormat *string `json:"bbox_annotation_format,omitempty"`
|
|
DocumentAnnotationFormat *string `json:"document_annotation_format,omitempty"`
|
|
DocumentAnnotationPrompt *string `json:"document_annotation_prompt,omitempty"`
|
|
ExtraParams map[string]interface{} `json:"-"`
|
|
}
|
|
|
|
// BifrostOCRRequest represents a request to perform OCR on a document.
|
|
type BifrostOCRRequest struct {
|
|
Provider ModelProvider `json:"provider"`
|
|
Model string `json:"model"`
|
|
ID *string `json:"id,omitempty"`
|
|
Document OCRDocument `json:"document"`
|
|
Params *OCRParameters `json:"params,omitempty"`
|
|
Fallbacks []Fallback `json:"fallbacks,omitempty"`
|
|
RawRequestBody []byte `json:"-"`
|
|
}
|
|
|
|
// GetRawRequestBody returns the raw request body for the OCR request.
|
|
func (r *BifrostOCRRequest) GetRawRequestBody() []byte {
|
|
return r.RawRequestBody
|
|
}
|
|
|
|
// OCRPageImage represents an extracted image from an OCR page.
|
|
type OCRPageImage struct {
|
|
ID string `json:"id"`
|
|
TopLeftX float64 `json:"top_left_x"`
|
|
TopLeftY float64 `json:"top_left_y"`
|
|
BottomRightX float64 `json:"bottom_right_x"`
|
|
BottomRightY float64 `json:"bottom_right_y"`
|
|
ImageBase64 *string `json:"image_base64,omitempty"`
|
|
}
|
|
|
|
// OCRPageDimensions represents the dimensions of an OCR page.
|
|
type OCRPageDimensions struct {
|
|
DPI int `json:"dpi"`
|
|
Height int `json:"height"`
|
|
Width int `json:"width"`
|
|
}
|
|
|
|
// OCRPage represents a single processed page from an OCR response.
|
|
type OCRPage struct {
|
|
Index int `json:"index"`
|
|
Markdown string `json:"markdown"`
|
|
Images []OCRPageImage `json:"images,omitempty"`
|
|
Dimensions *OCRPageDimensions `json:"dimensions,omitempty"`
|
|
}
|
|
|
|
// OCRUsageInfo represents usage information from an OCR response.
|
|
type OCRUsageInfo struct {
|
|
PagesProcessed int `json:"pages_processed"`
|
|
DocSizeBytes int `json:"doc_size_bytes"`
|
|
}
|
|
|
|
// BifrostOCRResponse represents the response from an OCR request.
|
|
type BifrostOCRResponse struct {
|
|
Model string `json:"model"`
|
|
Pages []OCRPage `json:"pages"`
|
|
UsageInfo *OCRUsageInfo `json:"usage_info,omitempty"`
|
|
DocumentAnnotation *string `json:"document_annotation,omitempty"`
|
|
ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
|
|
}
|