first commit
This commit is contained in:
89
core/schemas/ocr.go
Normal file
89
core/schemas/ocr.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package schemas
|
||||
|
||||
// OCRDocumentType specifies the type of document input for an OCR request.
|
||||
type OCRDocumentType string
|
||||
|
||||
const (
|
||||
// OCRDocumentTypeDocumentURL represents a document URL input (e.g., PDF URL or base64 data URL).
|
||||
OCRDocumentTypeDocumentURL OCRDocumentType = "document_url"
|
||||
// OCRDocumentTypeImageURL represents an image URL input.
|
||||
OCRDocumentTypeImageURL OCRDocumentType = "image_url"
|
||||
)
|
||||
|
||||
// OCRDocument represents the document input for an OCR request.
|
||||
type OCRDocument struct {
|
||||
Type OCRDocumentType `json:"type"`
|
||||
DocumentURL *string `json:"document_url,omitempty"`
|
||||
ImageURL *string `json:"image_url,omitempty"`
|
||||
}
|
||||
|
||||
// OCRParameters contains optional parameters for an OCR request.
|
||||
type OCRParameters struct {
|
||||
IncludeImageBase64 *bool `json:"include_image_base64,omitempty"`
|
||||
Pages []int `json:"pages,omitempty"`
|
||||
ImageLimit *int `json:"image_limit,omitempty"`
|
||||
ImageMinSize *int `json:"image_min_size,omitempty"`
|
||||
TableFormat *string `json:"table_format,omitempty"`
|
||||
ExtractHeader *bool `json:"extract_header,omitempty"`
|
||||
ExtractFooter *bool `json:"extract_footer,omitempty"`
|
||||
BBoxAnnotationFormat *string `json:"bbox_annotation_format,omitempty"`
|
||||
DocumentAnnotationFormat *string `json:"document_annotation_format,omitempty"`
|
||||
DocumentAnnotationPrompt *string `json:"document_annotation_prompt,omitempty"`
|
||||
ExtraParams map[string]interface{} `json:"-"`
|
||||
}
|
||||
|
||||
// BifrostOCRRequest represents a request to perform OCR on a document.
|
||||
type BifrostOCRRequest struct {
|
||||
Provider ModelProvider `json:"provider"`
|
||||
Model string `json:"model"`
|
||||
ID *string `json:"id,omitempty"`
|
||||
Document OCRDocument `json:"document"`
|
||||
Params *OCRParameters `json:"params,omitempty"`
|
||||
Fallbacks []Fallback `json:"fallbacks,omitempty"`
|
||||
RawRequestBody []byte `json:"-"`
|
||||
}
|
||||
|
||||
// GetRawRequestBody returns the raw request body for the OCR request.
|
||||
func (r *BifrostOCRRequest) GetRawRequestBody() []byte {
|
||||
return r.RawRequestBody
|
||||
}
|
||||
|
||||
// OCRPageImage represents an extracted image from an OCR page.
|
||||
type OCRPageImage struct {
|
||||
ID string `json:"id"`
|
||||
TopLeftX float64 `json:"top_left_x"`
|
||||
TopLeftY float64 `json:"top_left_y"`
|
||||
BottomRightX float64 `json:"bottom_right_x"`
|
||||
BottomRightY float64 `json:"bottom_right_y"`
|
||||
ImageBase64 *string `json:"image_base64,omitempty"`
|
||||
}
|
||||
|
||||
// OCRPageDimensions represents the dimensions of an OCR page.
|
||||
type OCRPageDimensions struct {
|
||||
DPI int `json:"dpi"`
|
||||
Height int `json:"height"`
|
||||
Width int `json:"width"`
|
||||
}
|
||||
|
||||
// OCRPage represents a single processed page from an OCR response.
|
||||
type OCRPage struct {
|
||||
Index int `json:"index"`
|
||||
Markdown string `json:"markdown"`
|
||||
Images []OCRPageImage `json:"images,omitempty"`
|
||||
Dimensions *OCRPageDimensions `json:"dimensions,omitempty"`
|
||||
}
|
||||
|
||||
// OCRUsageInfo represents usage information from an OCR response.
|
||||
type OCRUsageInfo struct {
|
||||
PagesProcessed int `json:"pages_processed"`
|
||||
DocSizeBytes int `json:"doc_size_bytes"`
|
||||
}
|
||||
|
||||
// BifrostOCRResponse represents the response from an OCR request.
|
||||
type BifrostOCRResponse struct {
|
||||
Model string `json:"model"`
|
||||
Pages []OCRPage `json:"pages"`
|
||||
UsageInfo *OCRUsageInfo `json:"usage_info,omitempty"`
|
||||
DocumentAnnotation *string `json:"document_annotation,omitempty"`
|
||||
ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
|
||||
}
|
||||
Reference in New Issue
Block a user