From 6fabfbdf0ea20895f5faca922aef9da8203c4314 Mon Sep 17 00:00:00 2001 From: rudokemper Date: Wed, 8 May 2024 08:01:26 -0400 Subject: [PATCH] Use sax to process OSM data in a streaming manner --- package-lock.json | 12 ++-------- package.json | 4 ++-- src/download_resources.js | 49 ++++++++++++++++++++++----------------- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/package-lock.json b/package-lock.json index 5d2fc8a..b86a7ec 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,8 +20,8 @@ "path": "^0.12.7", "pg": "^8.11.5", "pmtiles": "3.0.3", - "sharp": "^0.33.2", - "xmldom": "^0.6.0" + "sax": "^1.3.0", + "sharp": "^0.33.2" }, "bin": { "mapgl-tile-renderer": "src/cli.js" @@ -10775,14 +10775,6 @@ "node": ">=4.0" } }, - "node_modules/xmldom": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.6.0.tgz", - "integrity": "sha512-iAcin401y58LckRZ0TkI4k0VSM1Qg0KGSc3i8rU+xrxe19A/BN1zHyVSJY7uoutVlaTSzYyk/v5AmkewAP7jtg==", - "engines": { - "node": ">=10.0.0" - } - }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", diff --git a/package.json b/package.json index 77d3899..2a64023 100644 --- a/package.json +++ b/package.json @@ -24,8 +24,8 @@ "path": "^0.12.7", "pg": "^8.11.5", "pmtiles": "3.0.3", - "sharp": "^0.33.2", - "xmldom": "^0.6.0" + "sax": "^1.3.0", + "sharp": "^0.33.2" }, "devDependencies": { "@babel/core": "^7.23.9", diff --git a/src/download_resources.js b/src/download_resources.js index a88ea28..b1ef6c8 100644 --- a/src/download_resources.js +++ b/src/download_resources.js @@ -2,6 +2,7 @@ import fs from "fs"; import path from "path"; import axios from "axios"; import pLimit from "p-limit"; +import sax from "sax"; import osmtogeojson from "osmtogeojson"; import xmldom from "xmldom"; @@ -334,28 +335,34 @@ export const requestOpenStreetMapData = async (bounds, tempDir) => { console.log(`OpenStreetMap data downloaded!`); // Convert OSM XML data to OSM JSON using xmldom - // In the future, we might want to use a more robust OSM parser like - // https://github.com/tyrasd/osmtogeojson/blob/gh-pages/parse_osmxml.js - const parser = new xmldom.DOMParser(); - const osmData = parser.parseFromString( - fs.readFileSync(`${outputDir}/data.osm`, "utf-8"), - ); + console.log(`Converting OpenStreetMap data to GeoJSON...`); - // Convert OSM JSON to GeoJSON - const geojson = osmtogeojson(osmData); + const parser = sax.createStream(true); + let osmData = ""; - // Filter out lines and points only - geojson.features = geojson.features.filter( - (feature) => - feature.geometry.type === "LineString" || - feature.geometry.type === "Point", - ); + parser.on("text", (text) => { + osmData += text; + }); - fs.writeFileSync( - `${outputDir}/openstreetmap.geojson`, - JSON.stringify(geojson, null, 4), - ); - console.log( - `\x1b[32mOpenStreetMap data successfully downloaded and converted to GeoJSON!\x1b[0m`, - ); + parser.on("end", () => { + // Convert OSM JSON to GeoJSON + const geojson = osmtogeojson(osmData); + + // Filter out lines and points only + geojson.features = geojson.features.filter( + (feature) => + feature.geometry.type === "LineString" || + feature.geometry.type === "Point", + ); + + fs.writeFileSync( + `${outputDir}/openstreetmap.geojson`, + JSON.stringify(geojson, null, 4), + ); + console.log( + `\x1b[32mOpenStreetMap data successfully downloaded and converted to GeoJSON!\x1b[0m`, + ); + }); + + fs.createReadStream(`${outputDir}/data.osm`).pipe(parser); };