Bitparse

Parse Endpoint

Full reference for the POST /parse endpoint.

The /parse endpoint accepts a document file and returns structured, enriched XML output for every page.

Request

POST https://api.bitparse.ai/parse

Headers

HeaderRequiredDescription
X-API-KeyYesYour API key (starts with bp_)
Content-TypeAutoSet automatically to multipart/form-data by your HTTP client

Body

Send the file as multipart/form-data with the field name file.

FieldTypeDescription
fileFileThe document to parse. PDF, PNG, or JPEG.

Limits

LimitValue
Max file size10 MB
Max pages per upload2,000
Rate limit1 request/second per user
TimeoutUp to 25 minutes for large PDFs

File type is detected by magic bytes, not file extension. Renaming a .txt to .pdf will return a 400 error.

Code Examples

curl -X POST https://api.bitparse.ai/parse \
  -H "X-API-Key: bp_YOUR_API_KEY" \
  -F "file=@invoice.pdf"
import requests

resp = requests.post(
    "https://api.bitparse.ai/parse",
    headers={"X-API-Key": "bp_YOUR_API_KEY"},
    files={"file": open("invoice.pdf", "rb")},
)

data = resp.json()
for page in data["pages"]:
    print(f"Page {page['page_number']}:")
    print(page["text"])
const fs = require("fs");
const FormData = require("form-data");

const form = new FormData();
form.append("file", fs.createReadStream("invoice.pdf"));

const resp = await fetch("https://api.bitparse.ai/parse", {
  method: "POST",
  headers: { "X-API-Key": "bp_YOUR_API_KEY" },
  body: form,
});

const data = await resp.json();
data.pages.forEach((page) => {
  console.log(`Page ${page.page_number}:`);
  console.log(page.text);
});
package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"mime/multipart"
	"net/http"
	"os"
)

func main() {
	file, _ := os.Open("invoice.pdf")
	defer file.Close()

	var body bytes.Buffer
	writer := multipart.NewWriter(&body)
	part, _ := writer.CreateFormFile("file", "invoice.pdf")
	io.Copy(part, file)
	writer.Close()

	req, _ := http.NewRequest("POST", "https://api.bitparse.ai/parse", &body)
	req.Header.Set("X-API-Key", "bp_YOUR_API_KEY")
	req.Header.Set("Content-Type", writer.FormDataContentType())

	resp, _ := http.DefaultClient.Do(req)
	defer resp.Body.Close()

	var result map[string]interface{}
	json.NewDecoder(resp.Body).Decode(&result)
	fmt.Println(result)
}

Response

A successful response returns 200 OK with a JSON body:

{
  "pages": [
    {
      "page_number": 1,
      "text": "<title id=\"page1_elem0\">Invoice #1042</title>\n\n<text id=\"page1_elem1\">Billed to: Acme Corp...</text>\n\n<table id=\"page1_elem2\">\n| Item       | Amount  |\n|------------|--------|\n| Consulting | $5,000 |\n| Support    | $1,200 |\n</table>",
      "elements": [
        {"id": "page1_elem0", "type": "title", "content": "Invoice #1042", "image_data": ""},
        {"id": "page1_elem1", "type": "text", "content": "Billed to: Acme Corp...", "image_data": ""},
        {"id": "page1_elem2", "type": "table", "content": "| Item       | Amount  |\n|------------|--------|\n| Consulting | $5,000 |\n| Support    | $1,200 |", "image_data": ""}
      ]
    }
  ],
  "total_pages": 1,
  "processing_time_ms": 2341,
  "credits_used": 1,
  "credits_remaining": 499
}

Errors

StatusCause
400 Bad RequestInvalid file type, file too large, or malformed multipart body
401 UnauthorizedMissing or invalid API key
402 Payment RequiredInsufficient credits
429 Too Many RequestsRate limit exceeded
500 Internal Server ErrorProcessing failure

See the full Errors reference for example response bodies.

On this page