Skip to content

Commit

Permalink
Merge pull request #46 from intelligentnode/45-add-image-to-text-func…
Browse files Browse the repository at this point in the history
…tionality

Added API for OCR
  • Loading branch information
intelligentnode authored Oct 30, 2023
2 parents 2ecb726 + c1ba9ca commit f92bc45
Show file tree
Hide file tree
Showing 11 changed files with 1,665 additions and 32 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Intelliserver is a microservice providing unified access to multiple AI models,
- **Image Generation**: generate quality images based on described contexts using diffusion image models.
- **Chat Context**: get the relevant messages for the chatbot conversation.
- **Parsers**: convert documents to text such as PDF and word.
- **OCR**: extract text from images using AWS or Google vision.

## Installation

Expand Down
11 changes: 11 additions & 0 deletions intelliserver/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,14 @@ lerna-debug.log*
.env.test.local
.env.production.local
.env.local


# general
node_modules
npm-debug.log
Dockerfile
.dockerignore
.git
.gitignore
README.md
LICENSE
4 changes: 2 additions & 2 deletions intelliserver/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# base image
FROM node:14
FROM node:14-alpine

# work directory
WORKDIR /app
Expand All @@ -8,7 +8,7 @@ WORKDIR /app
COPY package*.json ./

# install app dependencies
RUN npm install
RUN npm install --only=production

# copy app source
COPY . .
Expand Down
2 changes: 2 additions & 0 deletions intelliserver/api/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const semanticRouter = require('./functions/semanticsearch');
const evaluateRouter = require('./functions/evaluate');
const chatContextRouter = require('./functions/chatcontext');
const parserRoute = require('./parser/index');
const ocrRoute = require('./ocr/index');


// # api routers
Expand Down Expand Up @@ -71,6 +72,7 @@ app.use('/evaluate', evaluateRouter);
app.use('/chatcontext', chatContextRouter);

app.use('/parser', parserRoute)
app.use('/ocr', ocrRoute)

/* ### deploy the app ### */
var port = process.env.PORT || '80';
Expand Down
243 changes: 243 additions & 0 deletions intelliserver/api/ocr/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
const express = require('express');
const AWS = require('aws-sdk');
const path = require('path');
const fetch = require('node-fetch'); // For making HTTP requests
const { ImageAnnotatorClient } = require('@google-cloud/vision');
const getImageFromUrlOrFile = require('../../middleware/getImageFromUrlOrFile');
const awsConfigProvider = require('../../middleware/awsConfigProvider');
const { USE_DEFAULT_KEYS } = require(path.join(global.__basedir, 'config'));

const router = express.Router();


function getGoogleKey(req) {
if (USE_DEFAULT_KEYS && !req.body.api_key) {
return process.env.GOOGLE_KEY;
} else {
return req.body.api_key;
}
}

/**
* @swagger
* /ocr/aws:
* post:
* summary: Perform OCR on an image using AWS Rekognition.
* tags:
* - OCR
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* imageUrl:
* type: string
* description: The URL of the image to perform OCR on.
* responses:
* 200:
* description: OCR results.
* content:
* application/json:
* schema:
* type: object
* properties:
* status:
* type: string
* description: The status of the OCR operation (e.g., "OK").
* data:
* type: object
* properties:
* text:
* type: array
* items:
* type: string
* description: The extracted text from the image.
* 400:
* description: Invalid request or image URL.
* 500:
* description: Internal server error.\
* parameters:
* - in: header
* name: X-aws-access-Key
* schema:
* type: string
* required: false
* description: Optional AWS access key for this specific request.
* - in: header
* name: X-aws-secret-Key
* schema:
* type: string
* required: false
* description: Optional AWS secret key for this specific request.
* - in: header
* name: X-aws-region
* schema:
* type: string
* required: false
* description: Optional AWS region for this specific request.
*/
router.post('/aws', awsConfigProvider, getImageFromUrlOrFile, async (req, res) => {
try {
const { buffer } = req.file;
const awsOcr = async (imageBuffer) => {
const rekognition = new AWS.Rekognition({
accessKeyId: req.awsConfig.accessKeyId,
secretAccessKey: req.awsConfig.secretAccessKey,
region : req.awsConfig.region
});

const params = {
Image: {
Bytes: imageBuffer,
},
};

const response = await rekognition.detectText(params).promise();
const detectedText = response.TextDetections.map((textDetection) => textDetection.DetectedText);
return detectedText;
}

const detectedText = await awsOcr(buffer);
const response = {
status: 'OK',
data: {
text: detectedText
},
};
res.json(response);
} catch (error) {
res.status(500).json({ status: 'ERROR', message: error.message });
}
});

/**
* @swagger
* /ocr/google:
* post:
* summary: Perform OCR on an image using Google Cloud Vision API.
* tags:
* - OCR
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* imageUrl:
* type: string
* description: The URL of the image to perform OCR on.
* apiKey:
* type: string
* required: false
* description: Optional API key for this specific request.
* responses:
* 200:
* description: OCR results.
* content:
* application/json:
* schema:
* type: object
* properties:
* status:
* type: string
* description: The status of the OCR operation (e.g., "OK").
* data:
* type: object
* properties:
* text:
* type: string
* description: The extracted text from the image.
* 400:
* description: Invalid request or image URL.
* 500:
* description: Internal server error.
* parameters:
* - in: header
* name: X-aws-access-Key
* schema:
* type: string
* required: false
* description: Optional AWS access key for this specific request.
* - in: header
* name: X-aws-secret-Key
* schema:
* type: string
* required: false
* description: Optional AWS secret key for this specific request.
* - in: header
* name: X-aws-region
* schema:
* type: string
* required: false
* description: Optional AWS region for this specific request.
*/
router.post('/google', getImageFromUrlOrFile, async (req, res) => {
try {
const { buffer } = req.file;
const apiKey = getGoogleKey(req);

const googleOcr = async (imageBuffer) => {
const client = new ImageAnnotatorClient();
const [result] = await client.textDetection(imageBuffer);
const detectedText = result.textAnnotations.map((annotation) => annotation.description);
return detectedText;
};
const fetchOcr = async (imageBuffer) => {
// Define the Vision API endpoint
const endpoint = `https://vision.googleapis.com/v1/images:annotate?key=${apiKey}`;
// Prepare the request data
const requestData = {
requests: [
{
image: {
content: buffer.toString('base64'),
},
features: [{ type: 'TEXT_DETECTION' }],
},
],
};
// Make a POST request to the Vision API using node-fetch
const response = await fetch(endpoint, {
method: 'POST',
body: JSON.stringify(requestData),
headers: { 'Content-Type': 'application/json' },
});

// Check the response status
if (response.status !== 200) {
throw new Error(`Vision API request failed with status code ${response.status}`);
}

// Parse the JSON response
const responseData = await response.json();

// Extract detected text from the response
const detectedText = responseData.responses[0].textAnnotations.map(annotation => annotation.description);

return detectedText;
};

let detectedText
if(apiKey) {
detectedText = await fetchOcr(buffer)
} else {
detectedText = await googleOcr(buffer)
}

const responseBody = {
status: 'OK',
data: {
text: detectedText,
},
};

res.json(responseBody);
} catch (error) {
res.status(500).json({ status: 'ERROR', message: error.message });
}
});

module.exports = router;
5 changes: 5 additions & 0 deletions intelliserver/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,9 @@ module.exports = {
USE_API_AUTH: true,
/* show swagger docs */
SHOW_SWAGGER: process.env.SHOW_SWAGGER !== 'false',

/* AWS DEFAULT REGION */
AWS_DEFAULT_REGION: process.env.AWS_DEFAULT_REGION,
AWS_ACCESS_SECRET: process.env.AWS_ACCESS_SECRET,
AWS_ACCESS_KEY: process.env.AWS_ACCESS_KEY,
};
31 changes: 31 additions & 0 deletions intelliserver/middleware/awsConfigProvider.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// awsConfigMiddleware.js
const AWS = require('aws-sdk');
const path = require('path');

const config = require(path.join(global.__basedir, 'config'));

const awsConfigProvider = (req, res, next) => {
const accessKeyId = req.header('X-aws-access-Key');
const secretAccessKey = req.header('X-aws-secret-Key');
const region = req.header('X-aws-region');

req.awsConfig = {};
if (config.USE_DEFAULT_KEYS && accessKeyId && secretAccessKey) {
req.awsConfig = {
accessKeyId,
secretAccessKey,
region: region ?? config.AWS_DEFAULT_REGION
};
} else {
req.awsConfig = {
accessKeyId: config.AWS_ACCESS_KEY,
secretAccessKey: config.AWS_ACCESS_SECRET,
region: config.AWS_DEFAULT_REGION
};
}

// Continue with the next middleware or route handler
next();
};

module.exports = awsConfigProvider;
34 changes: 34 additions & 0 deletions intelliserver/middleware/getImageFromUrlOrFile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
const multer = require('multer');
const fetch = require('node-fetch'); // For making HTTP requests

// Configure multer for file uploads
const upload = multer();

// Middleware function to fetch and add image buffer to req.file
const getImageFromUrlOrFile = async (req, res, next) => {
try {
// Check if 'imageUrl' is present in the request body
const imageUrl = req.body.imageUrl;

if (imageUrl) {
// Fetch the image using the provided URL
const response = await fetch(imageUrl);

if (!response.ok) {
throw new Error('Failed to fetch the image');
}

const imageBuffer = await response.buffer();

// Add the image buffer to req.file
req.file = { fieldname: 'image', originalname: 'image', buffer: imageBuffer };
}
} catch (error) {
res.status(500).json({ status: 'ERROR', message: error.message });
}

// Continue to the file upload middleware (upload.single('image'))
upload.single('image')(req, res, next);
};

module.exports = getImageFromUrlOrFile;
Loading

0 comments on commit f92bc45

Please sign in to comment.