Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
bebechien authored Feb 21, 2025
2 parents 53ad12e + 6e44b97 commit 8e25ef7
Show file tree
Hide file tree
Showing 11 changed files with 679 additions and 113 deletions.
41 changes: 41 additions & 0 deletions Demos/PaliGemma2-on-Web/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
### Developed by [Nitin Tiwari](https://linkedin.com/in/tiwari-nitin).

# Inference PaliGemma 2 on the browser with ONNX & Transformers.js
This project is an implementation of inferencing the paligemma2-3b-mix-224 model on the browser using its converted ONNX weights and Hugging Face Transformers.js.

## PaliGemma 2 to ONNX Conversion:
![Logo](assets/paligemma2-onnx-pipeline.png)


## Steps to run:

1. Clone the repository on your local machine.
2. Navigate to `gemma-cookbook/Demos/PaliGemma2-on-Web` directory.
3. Run `npm install` to install the Node.js packages.
4. Run `node server.js` to start the server.
5. Open `localhost:3000` on your web browser and start inferencing with PaliGemma 2.

> [!NOTE]
> For the first time, it will take around 10-15 minutes to load the model weights.
## Results:
![Logo](assets/paligemma2-onnx-output.gif)


## Resources & References

1. [Google DeepMind PaliGemma 2](https://developers.googleblog.com/en/introducing-paligemma-2-mix/)
2. Colab Notebooks:
<table>
<tr>
<td><b>Convert and quantize PaliGemma 2 to ONNX</b></td>
<td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Convert_PaliGemma2_to_ONNX.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td>
</tr>
<tr>
<td><b>Inference PaliGemma 2 with Transformers.js</b></td>
<td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Inference_PaliGemma2_with_Transformers_js.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td>
</tr>
</table>

3. [**Medium Blog**](https://medium.com/@tiwarinitin1999/inference-paligemma-2-with-transformers-js-5545986ac14a) for step-by-step implementation.
4. [ONNX Community](https://huggingface.co/onnx-community)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
28 changes: 28 additions & 0 deletions Demos/PaliGemma2-on-Web/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"name": "paligemma2-onnx-transformers.js",
"version": "1.0.0",
"main": "server.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "node server.js"
},
"keywords": [],
"author": "Nitin Tiwari",
"license": "MIT",
"description": "Inference PaliGemma 2 on the browser using ONNX weights, and Transformers.js.",
"repository": {
"type": "git",
"url": "git+https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js.git"
},
"bugs": {
"url": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js/issues"
},
"homepage": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js#readme",
"dependencies": {
"@huggingface/transformers": "^3.3.3",
"canvas": "^3.1.0",
"express": "^4.21.2",
"server.js": "^1.0.0"
}
}
41 changes: 41 additions & 0 deletions Demos/PaliGemma2-on-Web/public/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Inference PaliGemma 2 with 🤗 Transformers.js</title>
<link rel="stylesheet" href="style.css">
</head>

<body>
<div class="container">
<header>
<h1>Inference PaliGemma 2 with 🤗 Transformers.js</h1>
</header>

<section class="image-section">
<div class="image-container">
<img id="originalImage" src="" alt="Original Image" style="display: none;">
<canvas id="processedCanvas" width="0" height="0"></canvas>
</div>
</section>

<section class="input-section">
<div class="file-upload">
<label for="imageUpload">Choose File</label>
<input type="file" id="imageUpload" accept="image/*" style="display:none;" />
</div>
<input type="text" id="promptInput" placeholder="Enter your prompt (eg: detect car)" />
<button id="processButton" disabled>Analyze Image</button>
</section>

<section class="status-section">
<p id="responseText" style="display: none;"></p>
</section>
</div>

<script src="script.js"></script>
</body>

</html>
186 changes: 186 additions & 0 deletions Demos/PaliGemma2-on-Web/public/script.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
document.addEventListener('DOMContentLoaded', () => {
const imageUpload = document.getElementById('imageUpload');
const processButton = document.getElementById('processButton');
const originalImage = document.getElementById('originalImage');
const processedCanvas = document.getElementById('processedCanvas');
const promptInput = document.getElementById('promptInput');
const responseTextDiv = document.getElementById('responseText');
const ctx = processedCanvas.getContext('2d');

let imageBase64 = '';
let originalImageURL = '';
let originalImageObj;

// Initially hide the original image
originalImage.style.display = 'none';

// Handle image upload
imageUpload.addEventListener('change', (event) => {
const file = event.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = (e) => {
imageBase64 = e.target.result.split(',')[1];
originalImageURL = e.target.result;
originalImage.src = originalImageURL;

originalImageObj = new Image();
originalImageObj.onload = () => {
// Keep aspect ratio while scaling
const originalWidth = originalImageObj.width;
const originalHeight = originalImageObj.height;

// Maximum dimensions for the display area
const maxWidth = 600;
const maxHeight = 400;

// Calculate aspect ratio
const aspectRatio = originalWidth / originalHeight;

let displayWidth = maxWidth;
let displayHeight = maxWidth / aspectRatio;

if (displayHeight > maxHeight) {
displayHeight = maxHeight;
displayWidth = maxHeight * aspectRatio;
}

processedCanvas.width = displayWidth;
processedCanvas.height = displayHeight;
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
ctx.drawImage(originalImageObj, 0, 0, displayWidth, displayHeight); // Draw the original image on the canvas

// Show the original image after it's loaded for preview
originalImage.style.display = 'block';
};
originalImageObj.src = originalImageURL;

// Enable the process button
processButton.disabled = false;
responseTextDiv.style.display = 'none';
responseTextDiv.innerHTML = '';
promptInput.value = '';
};
reader.readAsDataURL(file);
}
});

// Handle process button click
processButton.addEventListener('click', async () => {
if (!imageBase64) {
alert("Please upload an image first.");
return;
}

// Clear off previous results.
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
responseTextDiv.innerHTML = 'Analyzing...';
responseTextDiv.style.display = 'block';


let prompt = promptInput.value || "";
if (prompt.toLowerCase().includes("detect")) {
const labelMatch = prompt.match(/detect\s+(.*)/i);
const label = labelMatch ? labelMatch[1] : 'Unknown';
prompt = `<image>detect ${label}`;
} else {
prompt = `<image>${prompt}`;
}

try {
const response = await fetch('/process-image', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ image: imageBase64, prompt: prompt, targetWidth: processedCanvas.width, targetHeight: processedCanvas.height, originalWidth: originalImageObj.width, originalHeight: originalImageObj.height }),
});

if (response.ok) {
const data = await response.json();

if (data.success) {
if (prompt.includes("<image>detect")) {
const { boundingBox } = data;
drawBoundingBox(boundingBox, ctx)

responseTextDiv.style.display = 'block';
responseTextDiv.innerHTML = "Response: " + escapeHtml(data.message);
}
else {
processedCanvas.width = 0;
processedCanvas.height = 0;
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
responseTextDiv.style.display = 'block';
responseTextDiv.innerHTML = data.message;
}
}
else {
processedCanvas.width = 0;
processedCanvas.height = 0;
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
responseTextDiv.style.display = 'block';
responseTextDiv.innerHTML = "Response: " + data.message;
}
}
else {
alert('Error processing image.');
}

} catch (error) {
console.error('Error:', error);
alert('Error processing image.');
} finally {

}
});


// Function to draw the bounding box on canvas
function drawBoundingBox(boundingBox, ctx) {

const { x1, y1, x2, y2, label } = boundingBox;

// Generate random color for the bounding box and label background
const randomColor = getRandomColor();

// Set styles for the bounding box (random color stroke)
ctx.strokeStyle = randomColor;
ctx.lineWidth = 5;
ctx.strokeRect(x1, y1, x2 - x1, y2 - y1);

// Adjust label background height to fit the text properly
const labelPadding = 10;
const textWidth = ctx.measureText(label.charAt(0).toUpperCase() + label.slice(1)).width;
const labelWidth = textWidth * 3;
const labelHeight = 30;
const labelY = y1 - labelHeight;

// Draw background for the label (same random color as bounding box)
ctx.fillStyle = randomColor;
ctx.fillRect(x1, labelY, labelWidth, labelHeight);

// Set the text color to white
ctx.fillStyle = "white";
ctx.font = "bold 20px Arial";
ctx.fillText(label.charAt(0).toUpperCase() + label.slice(1), x1 + labelPadding, labelY + labelHeight - labelPadding);
}

// Function to generate a random RGB color
function getRandomColor() {
const r = Math.floor(Math.random() * 256);
const g = Math.floor(Math.random() * 256);
const b = Math.floor(Math.random() * 256);
return `rgb(${r},${g},${b})`;
}

function escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}

});
Loading

0 comments on commit 8e25ef7

Please sign in to comment.