Skip to content

Commit

Permalink
Take db out of tracking--release should be single source of truth
Browse files Browse the repository at this point in the history
  • Loading branch information
camille-s committed Aug 27, 2024
1 parent 14bf05d commit 88e52bb
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@
.vscode/
envs/
.last_upload
*.duckdb
9 changes: 5 additions & 4 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ conda:

rule check_dates:
output:
flag = '.needs_update',
flag = touch('.needs_update'),
shell:
'bash scripts/compare_dates.sh .last_build {output.flag}'
'bash scripts/compare_dates.sh .last_build .needs_update'

rule download_data:
params:
key=os.environ['AIRTABLE_API_KEY'],
input:
flag = rules.check_dates.output.flag,
flag = '.needs_update',
output:
jsons = expand('input_data/{table}.json', table = tables),
script:
Expand Down Expand Up @@ -73,7 +73,8 @@ rule readme:

rule all:
input:
rules.check_dates.output.flag,
# rules.download_data.input.flag,
'.needs_update',
rules.gh_release.output.flag,
rules.md_upload.output.flag,
rules.readme.output.md,
Expand Down
Binary file modified dag.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified gloss.duckdb
Binary file not shown.
2 changes: 1 addition & 1 deletion input_data/vocab.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"term_id":"recRa5SvDqSywoTlM","term":"council of governments (COG)","definition":"Connecticut's towns are organized into planning regions, each served by a council of governments (COG) made up of mayors and first selectmen from the region. Starting in 2023, COGs began replacing previous county designations for federal and state statistics. ","url":"https:\/\/portal.ct.gov\/OPM\/IGPP\/ORG\/Planning-Regions\/Planning-Regions---Overview","term_order":1,"project":["town_viewer"]}]
[{"term_id":"recRa5SvDqSywoTlM","term":"Council of governments (COG)","definition":"Connecticut's towns are organized into planning regions, each served by a council of governments (COG) made up of mayors and first selectmen from the region. Starting in 2023, COGs began replacing previous county designations for federal and state statistics. ","url":"https:\/\/portal.ct.gov\/OPM\/IGPP\/ORG\/Planning-Regions\/Planning-Regions---Overview","term_order":1,"project":["town_viewer"]}]
6 changes: 4 additions & 2 deletions scripts/compare_dates.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ if [ -f "$flagin" ]; then
fi

# if need_update = 1, touch flagfile
if [ $need_update -eq 1 ]; then
if [ "$need_update" -eq 1 ]; then
touch "$flagout"
fi
fi

echo "$need_update"
96 changes: 96 additions & 0 deletions scripts/viewer_dictionary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# get json with format
# vars:
# -- source1:
# -- variables1:
# ---- indicator:
# ---- detail:
# vocab:
# -- term1:

# need:
# * variables containing town_viewer in project
# * sources corresponding to those variables
# * vocab containing town_viewer in project
library(dplyr)

con <- DBI::dbConnect(duckdb::duckdb("gloss.duckdb", read_only = TRUE))

# filter variables for this project, join sources
# get just the first row for each set of details, such that
# e.g. Latino population, percent Latino won't both appear
DBI::dbExecute(
con,
"
create or replace temporary view town_viewer as (
with vrs as (
select variable, display, dataset, coalesce(question, detail) as detail, var_order
from variables
where list_contains(project, 'town_viewer')
order by var_order
),
src as (
select
replace(org, 'DataHaven', 'Questions on the') as org,
program,
dataset
from sources
)
select
vrs.variable,
vrs.display,
vrs.dataset,
vrs.detail,
vrs.var_order,
concat_ws(' ', src.org, src.program) as source
from vrs
inner join src
on vrs.dataset = src.dataset
);
"
)

proj <- DBI::dbGetQuery(
con,
"
with proj_defs as (
select *,
row_number() over (partition by detail order by var_order) as row
from town_viewer
where detail is not null
),
proj_vocab as (
select
term,
definition,
'General terms' as source,
url
from vocab
where list_contains(project, 'town_viewer')
order by term_order
),
proj_vars as (
select
display as term,
detail as definition,
source,
null as url
from proj_defs
where row = 1
order by var_order
)
select * from proj_vocab
union all by name
select * from proj_vars
;
"
)

defs <- proj |>
as_tibble() |>
mutate(source = forcats::as_factor(source)) |>
group_by(source) |>
tidyr::nest(.key = "variables")

jsonlite::write_json(defs, "output_data/dictionary.json")

DBI::dbDisconnect(con)

0 comments on commit 88e52bb

Please sign in to comment.