-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrewe_ebon_extractor.js
99 lines (84 loc) · 3.4 KB
/
rewe_ebon_extractor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import path from 'path';
import { readFileSync, readdirSync } from 'fs';
import fetch from 'node-fetch';
import { parseEBon } from 'rewe-ebon-parser';
const pdfFolder = path.join(process.cwd(), 'static', 'ebons');
const geoCache = new Map();
async function getLatLong(city, zip, street) {
const query = `${street}, ${zip}, ${city}`;
if (geoCache.has(query)) {
return geoCache.get(query);
}
try {
const url = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(query)}`;
const response = await fetch(url);
const data = await response.json();
if (data.length > 0) {
const latLong = { lat: data[0].lat, lon: data[0].lon };
geoCache.set(query, latLong);
console.log(`Geocoded: ${city}, ${zip}, ${street} → lat: ${latLong.lat}, lon: ${latLong.lon}`);
return latLong;
}
} catch (error) {
console.error(`Error fetching geolocation: ${error}`);
}
console.error(`Could not find geolocation for: ${city}, ${zip}, ${street}`);
return { lat: null, lon: null };
}
async function parseEbons() {
const files = readdirSync(pdfFolder).filter(file => file.endsWith('.pdf'));
const results = [];
const uniqueMarkets = new Map();
for (const file of files) {
try {
const filePath = path.join(pdfFolder, file);
console.log(`Processing: ${filePath}`);
const dataBuffer = readFileSync(filePath);
const receipt = await parseEBon(dataBuffer);
if (receipt.marketAddress) {
const { city, zip, street } = receipt.marketAddress;
if (city && zip && street) {
const marketKey = `${street}, ${zip}, ${city}`;
uniqueMarkets.set(marketKey, { city, zip, street });
}
}
results.push(receipt);
} catch (err) {
console.error(`Error processing ${file}:`, err);
}
}
// Fetch geolocation for each unique market
const geoPromises = [];
uniqueMarkets.forEach((marketData, key) => {
geoPromises.push(
getLatLong(marketData.city, marketData.zip, marketData.street)
.then(geoData => {
uniqueMarkets.set(key, { ...marketData, ...geoData });
})
);
});
await Promise.all(geoPromises);
results.forEach(receipt => {
if (receipt.marketAddress) {
const { city, zip, street } = receipt.marketAddress;
const marketKey = `${street}, ${zip}, ${city}`;
const geoData = uniqueMarkets.get(marketKey);
if (geoData) {
receipt.marketAddress.lat = geoData.lat;
receipt.marketAddress.lon = geoData.lon;
}
receipt.marketAddress = JSON.stringify(receipt.marketAddress);
}
if (receipt.items) receipt.items = JSON.stringify(receipt.items);
if (receipt.given) receipt.given = JSON.stringify(receipt.given);
if (receipt.payback) receipt.payback = JSON.stringify(receipt.payback);
if (receipt.taxDetails) receipt.taxDetails = JSON.stringify(receipt.taxDetails);
});
if (results.length === 0) {
console.log(`[WARNING] No data found or parsed from ${pdfFolder}`);
return [{ message: "No data available" }];
}
return results;
}
const data = await parseEbons();
export { data };