Skip to content

Commit

Permalink
Merge pull request #167 from brooklyn-data/add_support_for_dbt_spark
Browse files Browse the repository at this point in the history
Add support for dbt-spark adapter
  • Loading branch information
NiallRees committed Aug 11, 2022
2 parents f554a04 + 5c2faaa commit e368c93
Show file tree
Hide file tree
Showing 20 changed files with 46 additions and 17 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# dbt Artifacts Package
This package builds a mart of tables and views describing the project it is installed in. In pre V1 versions of the package, the artifacts dbt produces were uploaded to the warehouse, hence the name of the package. That's no longer the case, but the name has stuck!

The package currently supports Databricks and Snowflake.
The package currently supports Databricks, Spark and Snowflake adapters.

Models included:

Expand All @@ -28,7 +28,7 @@ See the generated [dbt docs site](https://brooklyn-data.github.io/dbt_artifacts/
```
packages:
- package: brooklyn-data/dbt_artifacts
version: 1.0.0
version: 1.1.0
```

2. Run `dbt deps` to install the package
Expand Down Expand Up @@ -146,6 +146,8 @@ tox -e integration_snowflake # For the Snowflake tests
tox -e integration_databricks # For the Databricks tests
```
The Spark tests require installing the [ODBC driver](https://www.databricks.com/spark/odbc-drivers-download). On a Mac, DBT_ENV_SPARK_DRIVER_PATH should be set to `/Library/simba/spark/lib/libsparkodbc_sbu.dylib`. Spark tests have not yet been added to the integration tests.
### SQLFluff
We use SQLFluff to keep SQL style consistent. A GitHub action automatically tests pull requests and adds annotations where there are failures. SQLFluff can also be run locally with `tox`. To install tox, we recommend using `pipx`.
Expand Down
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'dbt_artifacts'
version: '1.0.0'
version: '1.1.0'
config-version: 2
require-dbt-version: ">=1.0.0"
profile: "dbt_artifacts"
Expand Down
2 changes: 1 addition & 1 deletion docs/manifest.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions integration_test_project/example-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ export DBT_ENV_SECRET_SNOWFLAKE_TEST_WAREHOUSE=
export DBT_ENV_SECRET_DATABRICKS_HOST=
export DBT_ENV_SECRET_DATABRICKS_HTTP_PATH=
export DBT_ENV_SECRET_DATABRICKS_TOKEN=
export DBT_ENV_SPARK_DRIVER_PATH= # /Library/simba/spark/lib/libsparkodbc_sbu.dylib on a Mac
export DBT_ENV_SPARK_ENDPOINT= # The endpoint ID from the Databricks HTTP path

# dbt environment variables, change these
export DBT_CLOUD_PROJECT_ID=
Expand Down
9 changes: 9 additions & 0 deletions integration_test_project/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ dbt_artifacts:
http_path: "{{ env_var('DBT_ENV_SECRET_DATABRICKS_HTTP_PATH') }}"
token: "{{ env_var('DBT_ENV_SECRET_DATABRICKS_TOKEN') }}"
threads: 8
spark:
type: spark
method: odbc
schema: dbt_artifacts_test_commit_spark_{{ env_var('GITHUB_SHA_OVERRIDE', '') if env_var('GITHUB_SHA_OVERRIDE', '') else env_var('GITHUB_SHA') }}
host: "{{ env_var('DBT_ENV_SECRET_DATABRICKS_HOST') }}"
driver: "{{ env_var('DBT_ENV_SPARK_DRIVER_PATH') }}"
endpoint: "{{ env_var('DBT_ENV_SPARK_ENDPOINT') }}"
token: "{{ env_var('DBT_ENV_SECRET_DATABRICKS_TOKEN') }}"
threads: 8
4 changes: 4 additions & 0 deletions integration_test_project/snapshots/snapshot.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
-- Exclude this from Spark tests as it complains:
-- Snapshot functionality requires file_format be set to 'delta' or 'hudi'
-- It's not possible to dynamically change the file_format depending on target
{% snapshot my_snapshot %}
{{
config(
strategy='check',
unique_key='id',
target_schema='snapshot',
check_cols=['id', 'fruit'],
tags="snapshot"
)
}}

Expand Down
2 changes: 1 addition & 1 deletion macros/column_identifier.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
${{ column_index }}
{%- endmacro %}

{% macro databricks__column_identifier(column_index) -%}
{% macro spark__column_identifier(column_index) -%}
col{{ column_index }}
{%- endmacro %}
2 changes: 1 addition & 1 deletion macros/create_exposures_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_exposures_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_exposures_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_invocations_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_invocations_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_invocations_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
dbt_version STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_model_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_model_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_model_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_models_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_models_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_models_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_seed_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_seed_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_seed_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_seeds_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_seeds_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_seeds_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_snapshot_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_snapshot_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_snapshot_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create or replace table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_snapshots_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_snapshots_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_snapshots_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_sources_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_sources_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_sources_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_test_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_test_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_test_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/create_tests_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

{%- endmacro %}

{% macro databricks__get_create_tests_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
{% macro spark__get_create_tests_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
Expand Down
2 changes: 1 addition & 1 deletion macros/insert_into_metadata_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{{ return(adapter.dispatch('insert_into_metadata_table', 'dbt_artifacts')(database_name, schema_name, table_name, content)) }}
{%- endmacro %}

{% macro databricks__insert_into_metadata_table(database_name, schema_name, table_name, content) -%}
{% macro spark__insert_into_metadata_table(database_name, schema_name, table_name, content) -%}
{% set insert_into_table_query %}
insert into {{ schema_name }}.{{ table_name }}
{{ content }}
Expand Down
12 changes: 12 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ passenv =
DBT_ENV_SECRET_DATABRICKS_HOST
DBT_ENV_SECRET_DATABRICKS_HTTP_PATH
DBT_ENV_SECRET_DATABRICKS_TOKEN
DBT_ENV_SPARK_DRIVER_PATH
DBT_ENV_SPARK_ENDPOINT
DBT_CLOUD_PROJECT_ID
DBT_CLOUD_JOB_ID
DBT_CLOUD_RUN_ID
Expand Down Expand Up @@ -128,3 +130,13 @@ commands =
dbt run-operation create_dbt_artifacts_tables --target databricks
dbt build --exclude dbt_artifacts --target databricks
dbt build -s dbt_artifacts --target databricks

[testenv:integration_spark]
changedir = integration_test_project
deps = dbt-spark[ODBC]~=1.1.0
commands =
dbt deps
dbt run-operation create_dbt_artifacts_tables --target spark
dbt build --exclude dbt_artifacts tag:snapshot --target spark
dbt build -s dbt_artifacts --target spark

0 comments on commit e368c93

Please sign in to comment.