Skip to content

PDF Page Rendering

Added in v4.6.2

Render individual PDF pages as PNG images. Unlike the extraction pipeline (which parses text, tables, metadata), this API produces raw pixel data for thumbnails, vision model input, or custom OCR pipelines.

Two Approaches

API When to use
render_pdf_page You know which page you need, or only need a few pages
PdfPageIterator Process every page sequentially without loading all images into memory

Single Page

Python
from kreuzberg import render_pdf_page

# Render a single page (zero-based index)
png_bytes = render_pdf_page("document.pdf", page_index=0, dpi=150)

# Write to disk
with open("first_page.png", "wb") as f:
    f.write(png_bytes)
TypeScript
import { renderPdfPageSync } from "@kreuzberg/node";
import { writeFileSync } from "node:fs";

// Render a single page (zero-based index)
const pngBytes = renderPdfPageSync("document.pdf", 0, 150);

writeFileSync("first_page.png", pngBytes);
Rust
use kreuzberg::pdf::render_pdf_page_to_png;

let pdf_bytes = std::fs::read("document.pdf")?;

// Render a single page (zero-based index)
let png = render_pdf_page_to_png(&pdf_bytes, 0, Some(150), None)?;

std::fs::write("first_page.png", &png)?;
Go
package main

import (
    "fmt"
    "log"
    "os"

    "github.com/kreuzberg-dev/kreuzberg/packages/go/v4"
)

func main() {
    // Render a single page (zero-based index)
    iter, err := kreuzberg.NewPdfPageIterator("document.pdf", 150)
    if err != nil {
        log.Fatalf("failed to open PDF: %v", err)
    }
    defer iter.Close()

    pageIndex, png, ok, err := iter.Next()
    if err != nil {
        log.Fatalf("render error: %v", err)
    }
    if ok {
        os.WriteFile(fmt.Sprintf("page_%d.png", pageIndex), png, 0644)
    }
}
Java
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

try (var iter = Kreuzberg.PdfPageIterator.open(Path.of("document.pdf"), 150)) {
    // Render a single page (first page)
    if (iter.hasNext()) {
        Kreuzberg.PageResult page = iter.next();
        Files.write(Path.of("first_page.png"), page.data());
    }
}
C#
using Kreuzberg;

// Render a single page using the iterator
using var iter = PdfPageIterator.Open("document.pdf", dpi: 150);
foreach (var page in iter)
{
    File.WriteAllBytes("first_page.png", page.Data);
    break; // Only need the first page
}
Ruby
require 'kreuzberg'

# Render a single page (zero-based index)
png = Kreuzberg.render_pdf_page('document.pdf', 0, dpi: 150)

File.binwrite('first_page.png', png)
PHP
<?php

declare(strict_types=1);

use function Kreuzberg\render_pdf_page;

// Render a single page (zero-based index)
$png = render_pdf_page('document.pdf', 0, 150);

file_put_contents('first_page.png', $png);
R
library(kreuzberg)

# Render a single page (zero-based index)
png <- render_pdf_page("document.pdf", 0L, dpi = 150L)

writeBin(png, "first_page.png")
Elixir
# Render a single page (zero-based index)
{:ok, png} = Kreuzberg.render_pdf_page("document.pdf", 0, dpi: 150)

File.write!("first_page.png", png)
C
#include "kreuzberg.h"
#include <stdio.h>

int main(void) {
    /* Render a single page (zero-based index) */
    CRenderPageResult *page = kreuzberg_render_pdf_page("document.pdf", 0, 150);
    if (!page) {
        fprintf(stderr, "Error: %s\n", kreuzberg_last_error());
        return 1;
    }

    FILE *f = fopen("first_page.png", "wb");
    fwrite(page->data, 1, page->data_len, f);
    fclose(f);

    kreuzberg_free_render_page_result(page);
    return 0;
}

Page Iterator

Renders one page at a time, releasing each page's memory before advancing. Peak memory stays proportional to one page regardless of document length.

Python
from kreuzberg import render_pdf_page

# Iterate all pages by index (memory-efficient, one page at a time)
from kreuzberg import render_pdf_page

for page_index in range(total_pages):
    png_bytes = render_pdf_page("document.pdf", page_index=page_index, dpi=150)
    print(f"Page {page_index}: {len(png_bytes)} bytes")
TypeScript
import { PdfPageIterator } from "@kreuzberg/node";
import { writeFileSync } from "node:fs";

// Iterate all pages (memory-efficient, one page at a time)
const iter = new PdfPageIterator("document.pdf", 150);
let result;
while ((result = iter.next()) !== null) {
    const { pageIndex, data } = result;
    console.log(`Page ${pageIndex}: ${data.length} bytes`);
    writeFileSync(`page_${pageIndex}.png`, data);
}
iter.close();
Rust
use kreuzberg::pdf::PdfPageIterator;

// Iterate all pages (memory-efficient, one page at a time)
for result in PdfPageIterator::from_file("document.pdf", Some(150), None)? {
    let (page_index, png_bytes) = result?;
    println!("Page {}: {} bytes", page_index, png_bytes.len());
}
Go
package main

import (
    "fmt"
    "log"
    "os"

    "github.com/kreuzberg-dev/kreuzberg/packages/go/v4"
)

func main() {
    // Iterate all pages (memory-efficient, one page at a time)
    iter, err := kreuzberg.NewPdfPageIterator("document.pdf", 150)
    if err != nil {
        log.Fatalf("failed to create iterator: %v", err)
    }
    defer iter.Close()

    for {
        pageIndex, png, ok, err := iter.Next()
        if err != nil {
            log.Fatalf("render error: %v", err)
        }
        if !ok {
            break
        }
        fmt.Printf("Page %d: %d bytes\n", pageIndex, len(png))
        os.WriteFile(fmt.Sprintf("page_%d.png", pageIndex), png, 0644)
    }
}
Java
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

// Iterate all pages (memory-efficient, one page at a time)
try (var iter = Kreuzberg.PdfPageIterator.open(Path.of("document.pdf"), 150)) {
    while (iter.hasNext()) {
        Kreuzberg.PageResult page = iter.next();
        System.out.printf("Page %d: %d bytes%n", page.pageIndex(), page.data().length);
        Files.write(Path.of("page_" + page.pageIndex() + ".png"), page.data());
    }
}
C#
using Kreuzberg;

// Iterate all pages (memory-efficient, one page at a time)
using var iter = PdfPageIterator.Open("document.pdf", dpi: 150);
foreach (var page in iter)
{
    Console.WriteLine($"Page {page.PageIndex}: {page.Data.Length} bytes");
    File.WriteAllBytes($"page_{page.PageIndex}.png", page.Data);
}
C
#include "kreuzberg.h"
#include <stdio.h>

int main(void) {
    /* Iterate all pages (memory-efficient, one page at a time) */
    CRenderPageResult *page;
    for (size_t i = 0; ; i++) {
        page = kreuzberg_render_pdf_page("document.pdf", i, 150);
        if (!page) {
            break; /* No more pages or error */
        }

        printf("Page %zu: %zu bytes\n", i, page->data_len);

        char filename[64];
        snprintf(filename, sizeof(filename), "page_%zu.png", i);
        FILE *f = fopen(filename, "wb");
        fwrite(page->data, 1, page->data_len, f);
        fclose(f);

        kreuzberg_free_render_page_result(page);
    }

    return 0;
}

Iterator availability

PdfPageIterator is available in Python, TypeScript, Rust, Go, Java, C#, and C. Ruby, PHP, R, and Elixir provide render_pdf_page only — iterate pages with a loop over page indices.

DPI Configuration

DPI Pixel size (US Letter) Use case
72 612 x 792 Thumbnails, quick previews
150 (default) 1275 x 1650 General-purpose, screen display
300 2550 x 3300 OCR input, print quality

DPI for OCR

Use 300 DPI when rendering pages for OCR or vision models. The default 150 DPI may reduce recognition accuracy on small text.

Examples

Thumbnails

Python
from kreuzberg import render_pdf_page

thumbnail = render_pdf_page("report.pdf", page_index=0, dpi=72)
with open("thumbnail.png", "wb") as f:
    f.write(thumbnail)

Vision Model Input

Python
import base64
from kreuzberg import render_pdf_page

png = render_pdf_page("chart.pdf", page_index=2, dpi=300)
b64 = base64.b64encode(png).decode()

Batch-Render All Pages

Python
from pathlib import Path
from kreuzberg import render_pdf_page

output_dir = Path("pages")
output_dir.mkdir(exist_ok=True)

for i in range(total_pages):
    png = render_pdf_page("document.pdf", page_index=i, dpi=150)
    (output_dir / f"page_{i}.png").write_bytes(png)