PDF Page Rendering¶
Added in v4.6.2
Render individual PDF pages as PNG images. Unlike the extraction pipeline (which parses text, tables, metadata), this API produces raw pixel data for thumbnails, vision model input, or custom OCR pipelines.
Two Approaches¶
| API | When to use |
|---|---|
render_pdf_page |
You know which page you need, or only need a few pages |
PdfPageIterator |
Process every page sequentially without loading all images into memory |
Single Page¶
Go
package main
import (
"fmt"
"log"
"os"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v4"
)
func main() {
// Render a single page (zero-based index)
iter, err := kreuzberg.NewPdfPageIterator("document.pdf", 150)
if err != nil {
log.Fatalf("failed to open PDF: %v", err)
}
defer iter.Close()
pageIndex, png, ok, err := iter.Next()
if err != nil {
log.Fatalf("render error: %v", err)
}
if ok {
os.WriteFile(fmt.Sprintf("page_%d.png", pageIndex), png, 0644)
}
}
Java
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
try (var iter = Kreuzberg.PdfPageIterator.open(Path.of("document.pdf"), 150)) {
// Render a single page (first page)
if (iter.hasNext()) {
Kreuzberg.PageResult page = iter.next();
Files.write(Path.of("first_page.png"), page.data());
}
}
C
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
/* Render a single page (zero-based index) */
CRenderPageResult *page = kreuzberg_render_pdf_page("document.pdf", 0, 150);
if (!page) {
fprintf(stderr, "Error: %s\n", kreuzberg_last_error());
return 1;
}
FILE *f = fopen("first_page.png", "wb");
fwrite(page->data, 1, page->data_len, f);
fclose(f);
kreuzberg_free_render_page_result(page);
return 0;
}
Page Iterator¶
Renders one page at a time, releasing each page's memory before advancing. Peak memory stays proportional to one page regardless of document length.
Python
from kreuzberg import render_pdf_page
# Iterate all pages by index (memory-efficient, one page at a time)
from kreuzberg import render_pdf_page
for page_index in range(total_pages):
png_bytes = render_pdf_page("document.pdf", page_index=page_index, dpi=150)
print(f"Page {page_index}: {len(png_bytes)} bytes")
TypeScript
import { PdfPageIterator } from "@kreuzberg/node";
import { writeFileSync } from "node:fs";
// Iterate all pages (memory-efficient, one page at a time)
const iter = new PdfPageIterator("document.pdf", 150);
let result;
while ((result = iter.next()) !== null) {
const { pageIndex, data } = result;
console.log(`Page ${pageIndex}: ${data.length} bytes`);
writeFileSync(`page_${pageIndex}.png`, data);
}
iter.close();
Go
package main
import (
"fmt"
"log"
"os"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v4"
)
func main() {
// Iterate all pages (memory-efficient, one page at a time)
iter, err := kreuzberg.NewPdfPageIterator("document.pdf", 150)
if err != nil {
log.Fatalf("failed to create iterator: %v", err)
}
defer iter.Close()
for {
pageIndex, png, ok, err := iter.Next()
if err != nil {
log.Fatalf("render error: %v", err)
}
if !ok {
break
}
fmt.Printf("Page %d: %d bytes\n", pageIndex, len(png))
os.WriteFile(fmt.Sprintf("page_%d.png", pageIndex), png, 0644)
}
}
Java
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
// Iterate all pages (memory-efficient, one page at a time)
try (var iter = Kreuzberg.PdfPageIterator.open(Path.of("document.pdf"), 150)) {
while (iter.hasNext()) {
Kreuzberg.PageResult page = iter.next();
System.out.printf("Page %d: %d bytes%n", page.pageIndex(), page.data().length);
Files.write(Path.of("page_" + page.pageIndex() + ".png"), page.data());
}
}
C#
using Kreuzberg;
// Iterate all pages (memory-efficient, one page at a time)
using var iter = PdfPageIterator.Open("document.pdf", dpi: 150);
foreach (var page in iter)
{
Console.WriteLine($"Page {page.PageIndex}: {page.Data.Length} bytes");
File.WriteAllBytes($"page_{page.PageIndex}.png", page.Data);
}
C
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
/* Iterate all pages (memory-efficient, one page at a time) */
CRenderPageResult *page;
for (size_t i = 0; ; i++) {
page = kreuzberg_render_pdf_page("document.pdf", i, 150);
if (!page) {
break; /* No more pages or error */
}
printf("Page %zu: %zu bytes\n", i, page->data_len);
char filename[64];
snprintf(filename, sizeof(filename), "page_%zu.png", i);
FILE *f = fopen(filename, "wb");
fwrite(page->data, 1, page->data_len, f);
fclose(f);
kreuzberg_free_render_page_result(page);
}
return 0;
}
Iterator availability
PdfPageIterator is available in Python, TypeScript, Rust, Go, Java, C#, and C. Ruby, PHP, R, and Elixir provide render_pdf_page only — iterate pages with a loop over page indices.
DPI Configuration¶
| DPI | Pixel size (US Letter) | Use case |
|---|---|---|
| 72 | 612 x 792 | Thumbnails, quick previews |
| 150 (default) | 1275 x 1650 | General-purpose, screen display |
| 300 | 2550 x 3300 | OCR input, print quality |
DPI for OCR
Use 300 DPI when rendering pages for OCR or vision models. The default 150 DPI may reduce recognition accuracy on small text.
Examples¶
Thumbnails¶
Python
from kreuzberg import render_pdf_page
thumbnail = render_pdf_page("report.pdf", page_index=0, dpi=72)
with open("thumbnail.png", "wb") as f:
f.write(thumbnail)
Vision Model Input¶
Python
import base64
from kreuzberg import render_pdf_page
png = render_pdf_page("chart.pdf", page_index=2, dpi=300)
b64 = base64.b64encode(png).decode()