forked from google-gemini/gemma-cookbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
679 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
### Developed by [Nitin Tiwari](https://linkedin.com/in/tiwari-nitin). | ||
|
||
# Inference PaliGemma 2 on the browser with ONNX & Transformers.js | ||
This project is an implementation of inferencing the paligemma2-3b-mix-224 model on the browser using its converted ONNX weights and Hugging Face Transformers.js. | ||
|
||
## PaliGemma 2 to ONNX Conversion: | ||
 | ||
|
||
|
||
## Steps to run: | ||
|
||
1. Clone the repository on your local machine. | ||
2. Navigate to `gemma-cookbook/Demos/PaliGemma2-on-Web` directory. | ||
3. Run `npm install` to install the Node.js packages. | ||
4. Run `node server.js` to start the server. | ||
5. Open `localhost:3000` on your web browser and start inferencing with PaliGemma 2. | ||
|
||
> [!NOTE] | ||
> For the first time, it will take around 10-15 minutes to load the model weights. | ||
## Results: | ||
 | ||
|
||
|
||
## Resources & References | ||
|
||
1. [Google DeepMind PaliGemma 2](https://developers.googleblog.com/en/introducing-paligemma-2-mix/) | ||
2. Colab Notebooks: | ||
<table> | ||
<tr> | ||
<td><b>Convert and quantize PaliGemma 2 to ONNX</b></td> | ||
<td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Convert_PaliGemma2_to_ONNX.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td> | ||
</tr> | ||
<tr> | ||
<td><b>Inference PaliGemma 2 with Transformers.js</b></td> | ||
<td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Inference_PaliGemma2_with_Transformers_js.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td> | ||
</tr> | ||
</table> | ||
|
||
3. [**Medium Blog**](https://medium.com/@tiwarinitin1999/inference-paligemma-2-with-transformers-js-5545986ac14a) for step-by-step implementation. | ||
4. [ONNX Community](https://huggingface.co/onnx-community) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"name": "paligemma2-onnx-transformers.js", | ||
"version": "1.0.0", | ||
"main": "server.js", | ||
"type": "module", | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1", | ||
"start": "node server.js" | ||
}, | ||
"keywords": [], | ||
"author": "Nitin Tiwari", | ||
"license": "MIT", | ||
"description": "Inference PaliGemma 2 on the browser using ONNX weights, and Transformers.js.", | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js/issues" | ||
}, | ||
"homepage": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js#readme", | ||
"dependencies": { | ||
"@huggingface/transformers": "^3.3.3", | ||
"canvas": "^3.1.0", | ||
"express": "^4.21.2", | ||
"server.js": "^1.0.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
|
||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Inference PaliGemma 2 with 🤗 Transformers.js</title> | ||
<link rel="stylesheet" href="style.css"> | ||
</head> | ||
|
||
<body> | ||
<div class="container"> | ||
<header> | ||
<h1>Inference PaliGemma 2 with 🤗 Transformers.js</h1> | ||
</header> | ||
|
||
<section class="image-section"> | ||
<div class="image-container"> | ||
<img id="originalImage" src="" alt="Original Image" style="display: none;"> | ||
<canvas id="processedCanvas" width="0" height="0"></canvas> | ||
</div> | ||
</section> | ||
|
||
<section class="input-section"> | ||
<div class="file-upload"> | ||
<label for="imageUpload">Choose File</label> | ||
<input type="file" id="imageUpload" accept="image/*" style="display:none;" /> | ||
</div> | ||
<input type="text" id="promptInput" placeholder="Enter your prompt (eg: detect car)" /> | ||
<button id="processButton" disabled>Analyze Image</button> | ||
</section> | ||
|
||
<section class="status-section"> | ||
<p id="responseText" style="display: none;"></p> | ||
</section> | ||
</div> | ||
|
||
<script src="script.js"></script> | ||
</body> | ||
|
||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
document.addEventListener('DOMContentLoaded', () => { | ||
const imageUpload = document.getElementById('imageUpload'); | ||
const processButton = document.getElementById('processButton'); | ||
const originalImage = document.getElementById('originalImage'); | ||
const processedCanvas = document.getElementById('processedCanvas'); | ||
const promptInput = document.getElementById('promptInput'); | ||
const responseTextDiv = document.getElementById('responseText'); | ||
const ctx = processedCanvas.getContext('2d'); | ||
|
||
let imageBase64 = ''; | ||
let originalImageURL = ''; | ||
let originalImageObj; | ||
|
||
// Initially hide the original image | ||
originalImage.style.display = 'none'; | ||
|
||
// Handle image upload | ||
imageUpload.addEventListener('change', (event) => { | ||
const file = event.target.files[0]; | ||
if (file) { | ||
const reader = new FileReader(); | ||
reader.onload = (e) => { | ||
imageBase64 = e.target.result.split(',')[1]; | ||
originalImageURL = e.target.result; | ||
originalImage.src = originalImageURL; | ||
|
||
originalImageObj = new Image(); | ||
originalImageObj.onload = () => { | ||
// Keep aspect ratio while scaling | ||
const originalWidth = originalImageObj.width; | ||
const originalHeight = originalImageObj.height; | ||
|
||
// Maximum dimensions for the display area | ||
const maxWidth = 600; | ||
const maxHeight = 400; | ||
|
||
// Calculate aspect ratio | ||
const aspectRatio = originalWidth / originalHeight; | ||
|
||
let displayWidth = maxWidth; | ||
let displayHeight = maxWidth / aspectRatio; | ||
|
||
if (displayHeight > maxHeight) { | ||
displayHeight = maxHeight; | ||
displayWidth = maxHeight * aspectRatio; | ||
} | ||
|
||
processedCanvas.width = displayWidth; | ||
processedCanvas.height = displayHeight; | ||
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height); | ||
ctx.drawImage(originalImageObj, 0, 0, displayWidth, displayHeight); // Draw the original image on the canvas | ||
|
||
// Show the original image after it's loaded for preview | ||
originalImage.style.display = 'block'; | ||
}; | ||
originalImageObj.src = originalImageURL; | ||
|
||
// Enable the process button | ||
processButton.disabled = false; | ||
responseTextDiv.style.display = 'none'; | ||
responseTextDiv.innerHTML = ''; | ||
promptInput.value = ''; | ||
}; | ||
reader.readAsDataURL(file); | ||
} | ||
}); | ||
|
||
// Handle process button click | ||
processButton.addEventListener('click', async () => { | ||
if (!imageBase64) { | ||
alert("Please upload an image first."); | ||
return; | ||
} | ||
|
||
// Clear off previous results. | ||
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height); | ||
responseTextDiv.innerHTML = 'Analyzing...'; | ||
responseTextDiv.style.display = 'block'; | ||
|
||
|
||
let prompt = promptInput.value || ""; | ||
if (prompt.toLowerCase().includes("detect")) { | ||
const labelMatch = prompt.match(/detect\s+(.*)/i); | ||
const label = labelMatch ? labelMatch[1] : 'Unknown'; | ||
prompt = `<image>detect ${label}`; | ||
} else { | ||
prompt = `<image>${prompt}`; | ||
} | ||
|
||
try { | ||
const response = await fetch('/process-image', { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json', | ||
}, | ||
body: JSON.stringify({ image: imageBase64, prompt: prompt, targetWidth: processedCanvas.width, targetHeight: processedCanvas.height, originalWidth: originalImageObj.width, originalHeight: originalImageObj.height }), | ||
}); | ||
|
||
if (response.ok) { | ||
const data = await response.json(); | ||
|
||
if (data.success) { | ||
if (prompt.includes("<image>detect")) { | ||
const { boundingBox } = data; | ||
drawBoundingBox(boundingBox, ctx) | ||
|
||
responseTextDiv.style.display = 'block'; | ||
responseTextDiv.innerHTML = "Response: " + escapeHtml(data.message); | ||
} | ||
else { | ||
processedCanvas.width = 0; | ||
processedCanvas.height = 0; | ||
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height); | ||
responseTextDiv.style.display = 'block'; | ||
responseTextDiv.innerHTML = data.message; | ||
} | ||
} | ||
else { | ||
processedCanvas.width = 0; | ||
processedCanvas.height = 0; | ||
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height); | ||
responseTextDiv.style.display = 'block'; | ||
responseTextDiv.innerHTML = "Response: " + data.message; | ||
} | ||
} | ||
else { | ||
alert('Error processing image.'); | ||
} | ||
|
||
} catch (error) { | ||
console.error('Error:', error); | ||
alert('Error processing image.'); | ||
} finally { | ||
|
||
} | ||
}); | ||
|
||
|
||
// Function to draw the bounding box on canvas | ||
function drawBoundingBox(boundingBox, ctx) { | ||
|
||
const { x1, y1, x2, y2, label } = boundingBox; | ||
|
||
// Generate random color for the bounding box and label background | ||
const randomColor = getRandomColor(); | ||
|
||
// Set styles for the bounding box (random color stroke) | ||
ctx.strokeStyle = randomColor; | ||
ctx.lineWidth = 5; | ||
ctx.strokeRect(x1, y1, x2 - x1, y2 - y1); | ||
|
||
// Adjust label background height to fit the text properly | ||
const labelPadding = 10; | ||
const textWidth = ctx.measureText(label.charAt(0).toUpperCase() + label.slice(1)).width; | ||
const labelWidth = textWidth * 3; | ||
const labelHeight = 30; | ||
const labelY = y1 - labelHeight; | ||
|
||
// Draw background for the label (same random color as bounding box) | ||
ctx.fillStyle = randomColor; | ||
ctx.fillRect(x1, labelY, labelWidth, labelHeight); | ||
|
||
// Set the text color to white | ||
ctx.fillStyle = "white"; | ||
ctx.font = "bold 20px Arial"; | ||
ctx.fillText(label.charAt(0).toUpperCase() + label.slice(1), x1 + labelPadding, labelY + labelHeight - labelPadding); | ||
} | ||
|
||
// Function to generate a random RGB color | ||
function getRandomColor() { | ||
const r = Math.floor(Math.random() * 256); | ||
const g = Math.floor(Math.random() * 256); | ||
const b = Math.floor(Math.random() * 256); | ||
return `rgb(${r},${g},${b})`; | ||
} | ||
|
||
function escapeHtml(unsafe) { | ||
return unsafe | ||
.replace(/&/g, "&") | ||
.replace(/</g, "<") | ||
.replace(/>/g, ">") | ||
.replace(/"/g, """) | ||
.replace(/'/g, "'"); | ||
} | ||
|
||
}); |
Oops, something went wrong.