Skip to content

Commit

Permalink
fix: shapefile upload fix - fixed vsizip file path bug [2025-01-24]
Browse files Browse the repository at this point in the history
  • Loading branch information
CHRISCARLON committed Jan 24, 2025
1 parent 78b0174 commit b129c74
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "duckdb-postgis"
version = "0.1.8"
version = "0.1.9"
edition = "2021"
authors = ["chris@enmeshed.dev", "serj@enmeshed.dev"]
description = "A library for transforming geospatial data using DuckDB and ingesting it into a PostGIS database."
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
cargo add duckdb-postgis
```

## Current v0.1.8 release notes
## Current v0.1.9 release notes

### This Rust library does the following things

Expand All @@ -18,7 +18,7 @@ cargo add duckdb-postgis
- Performs CRS transformation on the data if required - ensures the CRS is ESPG:4326
- Loads the data into a PostGIS table with a correctly defined geometry column

### Improvements for release 0.1.9
### Improvements for release 0.1.10

- Handle raster data file formats
- Discard rows where there may be errors in the geometry column / ensure the programme doesn't crash when a geometry error is encountered - skip over it and log it instead
Expand Down
57 changes: 27 additions & 30 deletions src/duckdb_load/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl DuckDBFileProcessor {
fn find_shapefile_path(zip_path: &str) -> Result<String, Box<dyn Error>> {
let file = File::open(zip_path)?;
let mut archive = ZipArchive::new(file)?;

// Find first .shp file in the archive
for i in 0..archive.len() {
let file = archive.by_index(i)?;
Expand All @@ -73,10 +73,10 @@ impl DuckDBFileProcessor {
return Ok(name.to_string());
}
}

Err("No .shp file found in ZIP archive".into())
}

fn process_new_file(&self) -> Result<(), Box<dyn Error>> {
// Call initial methods
self.create_data_table()?;
Expand Down Expand Up @@ -148,44 +148,41 @@ impl DuckDBFileProcessor {
b"xl/drawings",
b"xl/sharedStrings",
b"xl/metadata",
b"xl/calc"
b"xl/calc",
];

// Adjust shapefile patterns to match expected 4 elements
let shapefile_patterns: [&[u8]; 4] = [
b".shp",
b".dbf",
b".prj",
b".shx"
];

let shapefile_patterns: [&[u8]; 4] = [b".shp", b".dbf", b".prj", b".shx"];

// Check for Excel patterns first
let is_excel = excel_patterns.iter().any(|&pattern| {
rest.windows(pattern.len()).any(|window| window == pattern)
});
let is_excel = excel_patterns
.iter()
.any(|&pattern| rest.windows(pattern.len()).any(|window| window == pattern));

// Check for Shapefile patterns
let is_shapefile = shapefile_patterns.iter().any(|&pattern| {
rest.windows(pattern.len()).any(|window| window == pattern)
});
let is_shapefile = shapefile_patterns
.iter()
.any(|&pattern| rest.windows(pattern.len()).any(|window| window == pattern));

match (is_excel, is_shapefile) {
(true, false) => Some(FileType::Excel),
(false, true) => Some(FileType::Shapefile),
(true, true) => {
// In case both patterns are found (unlikely) - return none
println!("Error: Both patterns found - check file - none returned");
None
},
(false, false) => None
}
(false, false) => None,
}
},
}
// Excel (XLS) - Compound File Binary Format
[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, ..] => Some(FileType::Excel),
// Parquet
[0x50, 0x41, 0x52, 0x31, ..] => Some(FileType::Parquet),
// Geopackage (SQLite)
[0x53, 0x51, 0x4C, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6F, 0x72, 0x6D, 0x61, 0x74, 0x20, 0x33, 0x00, ..] => Some(FileType::Geopackage),
[0x53, 0x51, 0x4C, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6F, 0x72, 0x6D, 0x61, 0x74, 0x20, 0x33, 0x00, ..] => {
Some(FileType::Geopackage)
}
_ => None,
}
}
Expand Down Expand Up @@ -242,10 +239,10 @@ impl DuckDBFileProcessor {
FileType::Shapefile => {
let shapefile_path = Self::find_shapefile_path(&self.file_path)?;
println!("Shapefile path: {}", shapefile_path);
let full_path = format!("/vsizip//{}/{}", self.file_path, shapefile_path);
let full_path = format!("/vsizip/{}/{}", self.file_path, shapefile_path);
println!("Full path: {}", full_path);
format!(
"CREATE TABLE data AS SELECT * FROM st_read('/vsizip//{}/{}');",
"CREATE TABLE data AS SELECT * FROM st_read('/vsizip/{}/{}');",
self.file_path, shapefile_path
)
}
Expand Down Expand Up @@ -293,21 +290,21 @@ impl DuckDBFileProcessor {
let mut archive = ZipArchive::new(file)?;
let shapefile_path = Self::find_shapefile_path(&self.file_path)?;
let prj_path = shapefile_path.replace(".shp", ".prj");

for i in 0..archive.len() {
let mut file = archive.by_index(i)?;
if file.name() == prj_path {
let mut prj_content = String::new();
file.read_to_string(&mut prj_content)?;

// Check for common British National Grid identifiers in the PRJ
if prj_content.contains("OSGB") || prj_content.contains("27700") {
println!("Found British National Grid CRS in PRJ file");
return Ok("27700".to_string());
}
}
}

// If we couldn't determine from PRJ, assume British National Grid for data
println!("No CRS found in PRJ file, assuming British National Grid (EPSG:27700)");
Ok("27700".to_string())
Expand All @@ -320,7 +317,7 @@ impl DuckDBFileProcessor {
);
let mut stmt = self.conn.prepare(&query)?;
let mut rows = stmt.query([])?;

if let Some(row) = rows.next()? {
let crs_number: String = row.get(0)?;
Ok(crs_number)
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use duckdb_load::launch_process_file;

fn main() -> Result<(), Box<dyn std::error::Error>> {
launch_process_file(
"test_files/green.zip",
"/Users/cmcarlon/Downloads/Road_LAeq_16h_London.zip",
"test-table-1000",
"postgresql://admin:password@localhost:5432/gridwalk",
"test-schema-2",
Expand Down

0 comments on commit b129c74

Please sign in to comment.