Skip to content

Commit

Permalink
Updated list and return free datasets location to read from S3 instea…
Browse files Browse the repository at this point in the history
…d of hard-coded values
  • Loading branch information
sindhu-ranga committed Jan 31, 2025
1 parent b95debe commit 9c7eca6
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 45 deletions.
2 changes: 1 addition & 1 deletion placekey/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.22'
__version__ = '0.0.23'
70 changes: 26 additions & 44 deletions placekey/placekey.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from shapely.ops import transform
from shapely.strtree import STRtree
from shapely.wkt import loads as wkt_loads

import boto3
from botocore import UNSIGNED
from botocore.config import Config

RESOLUTION = 10
BASE_RESOLUTION = 12
Expand Down Expand Up @@ -48,62 +50,42 @@
'^' + '-'.join([FIRST_TUPLE_REGEX, TUPLE_REGEX, TUPLE_REGEX]) + '$')
WHAT_REGEX_V1 = re.compile('^[' + ALPHABET + ']{3,}(-[' + ALPHABET + ']{3,})?$')
WHAT_REGEX_V2 = re.compile('^[01][abcdefghijklmnopqrstuvwxyz234567]{9}$')
DATASET_LOCATIONS = {
"boston-food-establishment-inspections": "s3a://safegraph-public/placekey-free-datasets/boston-food-establishment-inspections/boston-food-establishment-inspections.csv",
"boston-property-assessment-data": "s3a://safegraph-public/placekey-free-datasets/boston-property-assessment-data/boston-property-assessment-data.csv",
"boston-public-works-violations": "s3a://safegraph-public/placekey-free-datasets/boston-public-works-violations/boston-public-works-violations.csv",
"chicago-building-permits": "s3a://safegraph-public/placekey-free-datasets/chicago-building-permits/chicago-building-permits.csv",
"chicago-scofflaw-law-violation-data": "s3a://safegraph-public/placekey-free-datasets/chicago-scofflaw-law-violation-data/chicago-scofflaw-law-violation-data.csv",
"chicago-vacant-and-abandoned-buildings": "s3a://safegraph-public/placekey-free-datasets/chicago-vacant-and-abandoned-buildings/chicago-vacant-and-abandoned-buildings.csv",
"chipotle-locations": "s3a://safegraph-public/placekey-free-datasets/chipotle-locations/chipotle-locations.csv",
"federally-qualified-health-centers-(fqhc)-provider-locations": "s3a://safegraph-public/placekey-free-datasets/federally-qualified-health-centers-(fqhc)-provider-locations/federally-qualified-health-centers-(fqhc)-provider-locations.csv",
"foursquare-open-source-places": "s3a://safegraph-public/placekey-free-datasets/foursquare-open-source-places/foursquare-open-source-places.csv",
"home-health-agency-medicare-enrollments": "s3a://safegraph-public/placekey-free-datasets/home-health-agency-medicare-enrollments/home-health-agency-medicare-enrollments.csv",
"home-infusion-therapy-provider-medicare-enrollments": "s3a://safegraph-public/placekey-free-datasets/home-infusion-therapy-provider-medicare-enrollments/home-infusion-therapy-provider-medicare-enrollments.csv",
"hospice-medicare-enrollments": "s3a://safegraph-public/placekey-free-datasets/hospice-medicare-enrollments/hospice-medicare-enrollments.csv",
"hospital-medicare-enrollments": "s3a://safegraph-public/placekey-free-datasets/hospital-medicare-enrollments/hospital-medicare-enrollments.csv",
"la-crime-2020-24": "s3a://safegraph-public/placekey-free-datasets/la-crime-2020-24/la-crime-2020-24.csv",
"national-address-database": "s3a://safegraph-public/placekey-free-datasets/national-address-database/national-address-database.csv",
"national-downloadable-files-from-the-doctors-and-clinicians-data-section": "s3a://safegraph-public/placekey-free-datasets/national-downloadable-files-from-the-doctors-and-clinicians-data-section/national-downloadable-files-from-the-doctors-and-clinicians-data-section.csv",
"national-provider-identifier-(npi)": "s3a://safegraph-public/placekey-free-datasets/national-provider-identifier-(npi)/national-provider-identifier-(npi).csv",
"national-provider-identifier": "s3a://safegraph-public/placekey-free-datasets/national-provider-identifier/national-provider-identifier.csv",
"nyc-acris-property-locations": "s3a://safegraph-public/placekey-free-datasets/nyc-acris-property-locations/nyc-acris-property-locations.csv",
"nyc-tax-liens-sale": "s3a://safegraph-public/placekey-free-datasets/nyc-tax-liens-sale/nyc-tax-liens-sale.csv",
"overture": "s3a://safegraph-public/placekey-free-datasets/overture/overture.csv",
"paycheck-protection-program-lender-locations": "s3a://safegraph-public/placekey-free-datasets/paycheck-protection-program-lender-locations/paycheck-protection-program-lender-locations.csv",
"paycheck-protection-program-loan-data": "s3a://safegraph-public/placekey-free-datasets/paycheck-protection-program-loan-data/paycheck-protection-program-loan-data.csv",
"philadelphia-affordable-housing-production": "s3a://safegraph-public/placekey-free-datasets/philadelphia-affordable-housing-production/philadelphia-affordable-housing-production.csv",
"philadelphia-certified-for-rental-suitability": "s3a://safegraph-public/placekey-free-datasets/philadelphia-certified-for-rental-suitability/philadelphia-certified-for-rental-suitability.csv",
"philadelphia-demolitions": "s3a://safegraph-public/placekey-free-datasets/philadelphia-demolitions/philadelphia-demolitions.csv",
"preferred-communications-skinny-file": "s3a://safegraph-public/placekey-free-datasets/preferred-communications-skinny-file/preferred-communications-skinny-file.csv",
"regrid-skinny-file": "s3a://safegraph-public/placekey-free-datasets/regrid-skinny-file/regrid-skinny-file.csv",
"resimplifi-skinny-file": "s3a://safegraph-public/placekey-free-datasets/resimplifi-skinny-file/resimplifi-skinny-file.csv",
"rural-health-clinic-medicare-enrollments": "s3a://safegraph-public/placekey-free-datasets/rural-health-clinic-medicare-enrollments/rural-health-clinic-medicare-enrollments.csv",
"skilled-nursing-facility-medicare-enrollments": "s3a://safegraph-public/placekey-free-datasets/skilled-nursing-facility-medicare-enrollments/skilled-nursing-facility-medicare-enrollments.csv",
"starbucks-and-dunkin": "s3a://safegraph-public/placekey-free-datasets/starbucks-and-dunkin/starbucks-and-dunkin.csv",
"supplemental-nutrition-assistance-program-locations": "s3a://safegraph-public/placekey-free-datasets/supplemental-nutrition-assistance-program-locations/supplemental-nutrition-assistance-program-locations.csv",
"throtle-skinny-file": "s3a://safegraph-public/placekey-free-datasets/throtle-skinny-file/throtle-skinny-file.csv",
"verisk-skinny-file": "s3a://safegraph-public/placekey-free-datasets/verisk-skinny-file/verisk-skinny-file.csv",
"windfall-skinny-file": "s3a://safegraph-public/placekey-free-datasets/windfall-skinny-file/windfall-skinny-file.csv"
}
s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))

def list_free_datasets():
"""
:return: The names of every free placekey'd dataset Placekey offers
"""
return DATASET_LOCATIONS.keys()
folders = set()
paginator = s3.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket='placekey-free-datasets', Prefix='', Delimiter="/"):
for common_prefix in page.get("CommonPrefixes", []):
folders.add(common_prefix["Prefix"].replace("/", ""))
return folders

def return_free_datasets_location_by_name(name: str):
def return_free_datasets_location_by_name(name: str, url: bool = False):
"""
Get the S3 location of a free dataset by its name. Find names using list_free_datasets. Raises ValueError if name is not correct.
:param name: Dataset Name (str)
:param name: Return a URL or S3 URI? Default is False (S3 URI)
:return: The public S3 location of the placekey'd dataset
"""
if DATASET_LOCATIONS[name]:
return DATASET_LOCATIONS[name]
response = s3.list_objects_v2(Bucket='placekey-free-datasets', Prefix=name+'/csv')

# Extract files from the response
files = [obj["Key"] for obj in response.get("Contents", [])]

if len(files) == 1:
if url:
return "https://placekey-free-datasets.s3.us-west-2.amazonaws.com/"+files[0]
else:
return "s3://placekey-free-datasets/"+files[0]
elif len(files) == 0:
print()
raise FileNotFoundError("No files found in the specified S3 directory. Please notify Placekey.")
else:
raise ValueError("No dataset by name of ", name)
raise ValueError(f"Something went wrong. Please notify Placekey.")

def _get_header_int():
"""
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
--index-url https://pypi.python.org/simple/
-e .
boto3

0 comments on commit 9c7eca6

Please sign in to comment.