Skip to content

Commit

Permalink
bugfix/too-many-partitions (#41)
Browse files Browse the repository at this point in the history
* bugfix/too-many-partitions

* pr reference in changelog

* validation test rename

* Apply suggestions from code review

* partition removal

* docs regen

* docs adjustment
  • Loading branch information
fivetran-joemarkiewicz authored Sep 16, 2024
1 parent dcd2834 commit db28cd7
Show file tree
Hide file tree
Showing 15 changed files with 111 additions and 108 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ target/
dbt_modules/
logs/
.DS_Store
dbt_packages/
dbt_packages/
package-lock.yml
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# dbt_klaviyo v0.8.0
[PR #41](https://github.com/fivetran/dbt_klaviyo/pull/41) includes the following updates:

## Breaking Changes (Full refresh required after upgrading)
- Removed the `partition_by` logic from incremental models running on BigQuery. This change affects only BigQuery warehouses and resolves the `too many partitions` error that some users encountered. The partitioning was also deemed unnecessary for the mentioned models and their downstream references, offering no performance benefit. By removing it, we eliminate both the error risk and an unneeded configuration. This change applies to the following models:
- `int_klaviyo__event_attribution`
- `klaviyo__events`

## Under the Hood
- Added consistency and integrity validation tests for the `klaviyo__events` model.
- Cleaned up unnecessary variable configuration within the `integration_tests/dbt_project.yml` file.

# dbt_klaviyo v0.7.2
[PR #38](https://github.com/fivetran/dbt_klaviyo/pull/38) includes the following updates:

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Include the following klaviyo package version in your `packages.yml` file:
```yaml
packages:
- package: fivetran/klaviyo
version: [">=0.7.0", "<0.8.0"]
version: [">=0.8.0", "<0.9.0"]
```
### Step 3: Define database and schema variables
By default, this package runs using your destination and the `klaviyo` schema. If this is not where your Klaviyo data is (for example, if your Klaviyo schema is named `klaviyo_fivetran`), add the following configuration to your root `dbt_project.yml` file:
Expand Down
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'klaviyo'
version: '0.7.2'
version: '0.8.0'
config-version: 2
require-dbt-version: [">=1.3.0", "<2.0.0"]
vars:
Expand Down
2 changes: 1 addition & 1 deletion docs/catalog.json

Large diffs are not rendered by default.

47 changes: 10 additions & 37 deletions docs/index.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/manifest.json

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/run_results.json

This file was deleted.

56 changes: 4 additions & 52 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'klaviyo_integration_tests'
version: '0.7.2'
version: '0.8.0'
config-version: 2
profile: 'integration_tests'

Expand All @@ -9,64 +9,16 @@ dispatch:

vars:
klaviyo_schema: klaviyo_integration_tests
klaviyo_source:
klaviyo_source:
klaviyo_campaign_identifier: "campaign"
klaviyo_event_identifier: "event"
klaviyo_flow_identifier: "flow"
klaviyo_integration_identifier: "integration"
klaviyo_person_identifier: "person"
klaviyo_metric_identifier: "metric"
klaviyo__event_pass_through_columns: []
klaviyo__person_pass_through_columns: []

klaviyo:
event_table: "{{ ref( 'stg_klaviyo__event') }}"
campaign: "{{ ref( 'stg_klaviyo__campaign') }}"
flow: "{{ ref( 'stg_klaviyo__flow') }}"
integration: "{{ ref( 'stg_klaviyo__integration') }}"
person: "{{ ref( 'stg_klaviyo__person') }}"
metric: "{{ ref( 'stg_klaviyo__metric') }}"
klaviyo__email_attribution_lookback: 120 # in hours - 5 days -> https://help.klaviyo.com/hc/en-us/articles/115005248128#conversion-tracking1
klaviyo__sms_attribution_lookback: 24 # in hours -> https://help.klaviyo.com/hc/en-us/articles/115005248128#sms-conversion-tracking7

klaviyo__eligible_attribution_events: # default in klaviyo -> https://help.klaviyo.com/hc/en-us/articles/115005248128
- 'opened email'
- 'clicked email'
- 'clicked sms'

klaviyo__count_metrics:
- 'Active on Site' # default API metrics
- 'Viewed Product'
- 'Ordered Product' # default Shopify metrics
- 'Placed Order'
- 'Refunded Order' # note: klaviyo only tracks FULL refunds (not partial)
- 'Cancelled Order'
- 'Fulfilled Order'
- 'Received Email' # default email and sms conversion metrics in Klaviyo
- 'Clicked Email'
- 'Opened Email'
- 'Bounced Email'
- 'Marked Email as Spam'
- 'Dropped Email'
- 'Subscribed to List'
- 'Unsubscribed to List'
- 'Unsubscribed'
- 'Updated Email Preferences'
- 'Subscribed to Back in Stock'
- 'Merged Profile'
- 'Received SMS'
- 'Clicked SMS'
- 'Consented to Receive SMS'
- 'Sent SMS'
- 'Unsubscribed from SMS'
- 'Failed to Deliver SMS'

klaviyo__sum_revenue_metrics:
- 'Refunded Order'
- 'Placed Order'
- 'Ordered Product'
- 'checkout started'
- 'cancelled order'
models:
+schema: "klaviyo_{{ var('directed_schema','dev') }}"

seeds:
klaviyo_integration_tests:
Expand Down
3 changes: 1 addition & 2 deletions integration_tests/packages.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@

packages:
- local: ../
- local: ../
4 changes: 1 addition & 3 deletions integration_tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,4 @@ dbt-redshift>=1.3.0,<2.0.0
dbt-postgres>=1.3.0,<2.0.0
dbt-spark>=1.3.0,<2.0.0
dbt-spark[PyHive]>=1.3.0,<2.0.0
dbt-databricks>=1.6.0,<2.0.0

oscrypto @ git+https://github.com/wbond/oscrypto.git@d5f3437
dbt-databricks>=1.6.0,<2.0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{{ config(
tags="fivetran_validations",
enabled=var('fivetran_validation_tests_enabled', false)
) }}

with prod as (
select *
from {{ target.schema }}_klaviyo_prod.klaviyo__events
),

dev as (
select *
from {{ target.schema }}_klaviyo_dev.klaviyo__events
),

prod_not_in_dev as (
-- rows from prod not found in dev
select * from prod
except distinct
select * from dev
),

dev_not_in_prod as (
-- rows from dev not found in prod
select * from dev
except distinct
select * from prod
),

final as (
select
*,
'from prod' as source
from prod_not_in_dev

union all -- union since we only care if rows are produced

select
*,
'from dev' as source
from dev_not_in_prod
)

select *
from final
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{ config(
tags="fivetran_validations",
enabled=var('fivetran_validation_tests_enabled', false)
) }}

with source_count as (
select
1 as join_key,
count(*) as row_count
from {{ ref('stg_klaviyo__event') }}
),

end_count as (
select
1 as join_key,
count(*) as row_count
from {{ ref('klaviyo__events') }}
),

final as (
select
end_count.join_key,
end_count.row_count as ending_row_count,
source_count.row_count as source_row_count
from end_count
full outer join source_count
on source_count.join_key = end_count.join_key
)

select *
from final
where ending_row_count != source_row_count
4 changes: 0 additions & 4 deletions models/intermediate/int_klaviyo__event_attribution.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
config(
materialized='incremental',
unique_key='unique_event_id',
partition_by={
"field": "occurred_on",
"data_type": "date"
} if target.type == 'bigquery' else none,
incremental_strategy = 'merge' if target.type not in ('postgres', 'redshift') else 'delete+insert',
file_format = 'delta'
)
Expand Down
4 changes: 0 additions & 4 deletions models/klaviyo__events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
config(
materialized='incremental',
unique_key='unique_event_id',
partition_by={
"field": "occurred_on",
"data_type": "date"
} if target.type == 'bigquery' else none,
incremental_strategy = 'merge' if target.type not in ('snowflake', 'postgres', 'redshift') else 'delete+insert',
file_format = 'delta'
)
Expand Down

0 comments on commit db28cd7

Please sign in to comment.