Skip to content

Commit

Permalink
docs: memory
Browse files Browse the repository at this point in the history
  • Loading branch information
arshad-yaseen committed Jan 9, 2025
1 parent 6c853fd commit 4b8b27a
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 73 deletions.
30 changes: 29 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ A lightweight, browser-based library for converting PDF files to images with eas
- 📦 Multiple output formats (PNG/JPEG) and types (base64, buffer, blob, dataURL)
- ⚡ Convert specific pages or page ranges
- 🛡️ Robust error handling and TypeScript support
- 🧠 Memory efficient with batch processing and cleanup
- 🧠 Super memory efficient with batch processing and cleanup

[Demo](https://pdf-to-images-browser.arshadyaseen.com/)

Expand All @@ -27,10 +27,16 @@ A lightweight, browser-based library for converting PDF files to images with eas
- [API Reference](#api-reference)
- [Main Function](#main-function)
- [Parameters](#parameters)
- [Converting Specific Pages](#converting-specific-pages)
- [Using Different Output Formats](#using-different-output-formats)
- [Using Batch Processing](#using-batch-processing)
- [Error Handling](#error-handling)
- [Browser Compatibility](#browser-compatibility)
- [Server-side usage](#server-side-usage)
- [Next.js Usage](#nextjs-usage)
- [Performance Limits and Best Practices](#performance-limits-and-best-practices)
- [Browser Limitations](#browser-limitations)
- [Best Practices](#best-practices)
- [Contributing](#contributing)
- [License](#license)

Expand Down Expand Up @@ -254,6 +260,28 @@ export default function Page() {
}
```

## Performance Limits and Best Practices

### Browser Limitations

The PDF to Images conversion process is constrained by browser limitations and available system resources. Here are the recommended limits for optimal performance:

| Resource | Recommended Limit | Maximum Limit | Notes |
| --------------- | ----------------- | ------------- | ------------------------------------ |
| File Size | 100MB | 200MB | Browser memory constraints |
| Page Count | 100 pages | 200-300 pages | Depends on content complexity |
| Page Dimensions | 5000x5000px | 8192x8192px | Browser canvas limits |
| Scale Factor | 2.0 | 4.0 | Memory usage increases quadratically |

### Best Practices

To ensure efficient memory management and smooth PDF conversion:

1. Process large PDFs in batches using the `batchSize` option (defaults to 3 pages per batch)
2. Add delays between batches with `batchDelay` (defaults to 100ms) to prevent UI freezing
3. Clean up unused image data after processing using the `onProgress` callback
4. Keep scale factors reasonable (1.0-2.0) to limit memory usage

## Contributing

Contributions are welcome! Please read our [Contributing Guide](CONTRIBUTING.md) for details on our code of conduct and the process for submitting pull requests.
Expand Down
127 changes: 68 additions & 59 deletions src/core.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import {getDocument} from 'pdfjs-dist';
import type {DocumentInitParameters} from 'pdfjs-dist/types/src/display/api';
import type {
DocumentInitParameters,
PDFDocumentProxy,
} from 'pdfjs-dist/types/src/display/api';

import {InvalidPagesOptionError} from './errors';
import type {PDFSource, PDFToImagesOptions, PDFToImagesResult} from './types';
Expand Down Expand Up @@ -30,71 +33,77 @@ async function processPDF(
documentParams: DocumentInitParameters,
options: PDFToImagesOptions,
): Promise<(string | Blob | ArrayBuffer)[]> {
const {batchSize = 5, batchDelay = 100, onProgress} = options;

const pdfDoc = await getDocument(documentParams).promise;
const numPages = pdfDoc.numPages;
const pages = options.pages || 'all';

const {batchSize = 3, batchDelay = 100, onProgress} = options;
let pdfDoc: PDFDocumentProxy | null = null;
let pageNumbers: number[] = [];

if (pages === 'all') {
pageNumbers = Array.from({length: numPages}, (_, i) => i + 1);
} else if (pages === 'first') {
pageNumbers = [1];
} else if (pages === 'last') {
pageNumbers = [numPages];
} else if (typeof pages === 'number') {
pageNumbers = [Math.max(pages, 1)];
} else if (Array.isArray(pages)) {
pageNumbers = pages.length ? pages : [1];
} else if (typeof pages === 'object') {
const start = pages.start ?? 1;
const end = pages.end ?? numPages;
pageNumbers = generatePDFPageRange(start, end);
} else {
throw new InvalidPagesOptionError();
}

const allImages: (string | Blob | ArrayBuffer)[] = [];
const totalPages = pageNumbers.length;

// Process pages in batches
for (let i = 0; i < pageNumbers.length; i += batchSize) {
const batchPageNumbers = pageNumbers.slice(i, i + batchSize);
const batchPromises = batchPageNumbers.map(pageNumber =>
renderPDFPageToImage(pdfDoc, pageNumber, options),
);

// Process batch concurrently
const batchResults = await Promise.all(batchPromises);

// Clean up previous batch's canvases to free memory
if (typeof window !== 'undefined') {
batchPromises.length = 0;
await new Promise(resolve => setTimeout(resolve, 0)); // Yield to GC (just a hint)
try {
pdfDoc = await getDocument(documentParams).promise;
const numPages = pdfDoc.numPages;
const pages = options.pages || 'all';

if (pages === 'all') {
pageNumbers = Array.from({length: numPages}, (_, i) => i + 1);
} else if (pages === 'first') {
pageNumbers = [1];
} else if (pages === 'last') {
pageNumbers = [numPages];
} else if (typeof pages === 'number') {
pageNumbers = [Math.max(pages, 1)];
} else if (Array.isArray(pages)) {
pageNumbers = pages.length ? pages : [1];
} else if (typeof pages === 'object') {
const start = pages.start ?? 1;
const end = pages.end ?? numPages;
pageNumbers = generatePDFPageRange(start, end);
} else {
throw new InvalidPagesOptionError();
}

allImages.push(...batchResults);

// Report progress if callback provided
if (onProgress) {
onProgress({
completed: Math.min(i + batchSize, totalPages),
total: totalPages,
batch: batchResults,
});
const totalPages = pageNumbers.length;

// Process pages in batches
for (let i = 0; i < pageNumbers.length; i += batchSize) {
const batchPageNumbers = pageNumbers.slice(i, i + batchSize);

const batchPromises = batchPageNumbers.map(pageNumber =>
renderPDFPageToImage(pdfDoc, pageNumber, options),
);

try {
// Process batch concurrently
const batchResults = await Promise.all(batchPromises);
allImages.push(...batchResults);

if (onProgress) {
onProgress({
completed: Math.min(i + batchSize, totalPages),
total: totalPages,
batch: batchResults,
});
}

// Prevent UI blocking between batches
if (i + batchSize < pageNumbers.length) {
await new Promise(resolve => setTimeout(resolve, batchDelay));
}
} finally {
batchPromises.length = 0;
await new Promise(resolve => setTimeout(resolve, 0)); // Yield to GC
}
}

batchResults.length = 0;

// Prevent UI blocking between batches
if (i + batchSize < pageNumbers.length) {
await new Promise(resolve => setTimeout(resolve, batchDelay));
return allImages;
} finally {
if (pageNumbers) {
pageNumbers.length = 0;
}
if (pdfDoc) {
await pdfDoc.destroy();
}
if (allImages && allImages.length > 0) {
allImages.length = 0;
}
}

pageNumbers.length = 0;

return allImages;
}
6 changes: 6 additions & 0 deletions src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,9 @@ export class CanvasRenderingError extends PDFToImagesError {
super('Canvas toBlob failed');
}
}

export class PDFDocumentNotInitializedError extends PDFToImagesError {
constructor() {
super('PDF document is not initialized');
}
}
34 changes: 21 additions & 13 deletions src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import type {
} from 'pdfjs-dist/types/src/display/api';

import {DEFAULT_PDF_TO_IMAGES_OPTIONS} from './constants';
import {CanvasRenderingError, InvalidOutputOptionError} from './errors';
import {
CanvasRenderingError,
InvalidOutputOptionError,
PDFDocumentNotInitializedError,
} from './errors';
import type {PDFSource, PDFToImagesOptions} from './types';

export function extractBase64FromDataURL(dataURL: string): string {
Expand Down Expand Up @@ -58,10 +62,14 @@ export function configurePDFToImagesParameters(
}

export async function renderPDFPageToImage(
pdfDoc: PDFDocumentProxy,
pdfDoc: PDFDocumentProxy | null,
pageNumber: number,
options: PDFToImagesOptions,
): Promise<string | Blob | ArrayBuffer> {
if (!pdfDoc) {
throw new PDFDocumentNotInitializedError();
}

const {scale = 1.0, format = 'png', output = 'base64'} = options;

const page = await pdfDoc.getPage(pageNumber);
Expand All @@ -83,22 +91,22 @@ export async function renderPDFPageToImage(
enableWebGL: true, // Enable WebGL rendering if available
};

await page.render(renderContext).promise;
try {
await page.render(renderContext).promise;

// Convert to desired format
const mimeType = format === 'jpg' ? 'image/jpeg' : 'image/png';
const result = await processCanvasOutput(canvas, mimeType, output);
// Convert to desired format
const mimeType = format === 'jpg' ? 'image/jpeg' : 'image/png';
const result = await processCanvasOutput(canvas, mimeType, output);

// Clean up
canvas.width = 0;
canvas.height = 0;
return result;
} finally {
canvas.width = 0;
canvas.height = 0;
page.cleanup();

// Help browser GC the canvas
if (typeof window !== 'undefined') {
// Yield to GC
await new Promise(resolve => setTimeout(resolve, 0));
}

return result;
}

export async function processCanvasOutput(
Expand Down

0 comments on commit 4b8b27a

Please sign in to comment.