diff --git a/.scripts/README.md b/.scripts/README.md index 778f46c2..27b39472 100644 --- a/.scripts/README.md +++ b/.scripts/README.md @@ -25,7 +25,7 @@ These scripts also require setting up a great_expectations datasource. To do so, These scripts allow for handling passwords via two means which reduce the risk of committing credentials to source control. -1. Set the `REDSHIFT_PASSWORD` or `SNOWFLAKE_PASSWORD` environment variables for Redshift and Snowflake respectively, or the `BIGQUERY_CREDS` environment variable for Bigquery. +1. Set the `REDSHIFT_PASSWORD` or `SNOWFLAKE_PASSWORD` environment variables for Redshift and Snowflake respectively, or for BigQuery set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to path of your JSON service account key file. 2. Pass the relevant credential to the relevant argument of the script in question. diff --git a/.scripts/run_config.sh b/.scripts/run_config.sh index 1f2ea23d..1fb20f01 100755 --- a/.scripts/run_config.sh +++ b/.scripts/run_config.sh @@ -52,23 +52,8 @@ if [ $target == "Default" ]; then elif [ "$target" == "BigQuery" ]; then - BIGQUERY_CREDS=${BIGQUERY_CREDS:-$CREDENTIALS} - export GOOGLE_APPLICATION_CREDENTIALS=$root_path/tmp/bq_creds.json + export GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-$CREDENTIALS} - if [ -n "$BIGQUERY_CREDS" ]; then - - # If creds provided via env var or argument, set trap to clean up, then create creds file. - cleanup() { - echo "run_config: Removing playbook file" - rm -f $root_path/tmp/current_playbook.yml - echo "run_config: Removing credentials file" - rm -f $root_path/tmp/bq_creds.json - } - - echo "run_config: writing bq creds to file" - echo $BIGQUERY_CREDS > $root_path/tmp/bq_creds.json - - fi fi for i in "${playbooks[@]}"; diff --git a/.scripts/run_test.sh b/.scripts/run_test.sh index 48c809d2..ce882ffb 100755 --- a/.scripts/run_test.sh +++ b/.scripts/run_test.sh @@ -22,35 +22,10 @@ cd $root_path/.test set -e -if [ "$DATABASE" == "bigquery" ]; then - - BIGQUERY_CREDS=${BIGQUERY_CREDS:-$CREDENTIALS} - - if [ -n "$BIGQUERY_CREDS" ]; then - - # If creds provided via env var or argument, set trap to clean up, then create creds file. - cleanup() { - echo "run_test: Removing credentials file" - rm -f $root_path/tmp/bq_creds.json - } - trap cleanup EXIT - - mkdir -p "${root_path}/tmp" - echo "run_test: writing bq creds to file" - echo $BIGQUERY_CREDS > $root_path/tmp/bq_creds.json - - fi - - # Set GOOGLE_APPLICATION_CREDENTIALS env var. - export GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-$root_path/tmp/bq_creds.json} - -else - - # If not BQ, take the relevant env var if it exists, set it to whatever's provided otherwise. - export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-$CREDENTIALS} - export SNOWFLAKE_PASSWORD=${SNOWFLAKE_PASSWORD:-$CREDENTIALS} - -fi +# Take the relevant env var if it exists, set it to whatever's provided otherwise. +export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-$CREDENTIALS} +export SNOWFLAKE_PASSWORD=${SNOWFLAKE_PASSWORD:-$CREDENTIALS} +export GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-$CREDENTIALS} # Set dummy env vars if not set already (to avoid config error) export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-'dummy'} diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_base.json b/.test/great_expectations/expectations/mobile/v1/mobile_base.json new file mode 100644 index 00000000..a37e12de --- /dev/null +++ b/.test/great_expectations/expectations/mobile/v1/mobile_base.json @@ -0,0 +1,145 @@ +{ + "data_asset_type": "Dataset", + "expectation_suite_name": "base", + "expectations": [ + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "platform" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "etl_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "collector_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "dvce_created_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "name_tracker" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "v_tracker" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "v_collector" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "v_etl" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "device_user_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "network_userid" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "dvce_sent_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_index" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "derived_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event_vendor" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event_name" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event_format" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event_version" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event" + } + }, + { + "expectation_type": "expect_column_values_to_be_unique", + "kwargs": { + "column": "event_id" + } + } + ], + "meta": { + "versions": { + "test_suite_version": "1.0.1", + "bigquery_model_version": "1.0.0" + }, + "great_expectations.__version__": "0.12.0" + } +} diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_base_redshift.json b/.test/great_expectations/expectations/mobile/v1/mobile_base_redshift.json index eff8278c..1c6054a7 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_base_redshift.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_base_redshift.json @@ -248,7 +248,7 @@ ], "meta": { "versions": { - "test_suite_version": "1.0.0", + "test_suite_version": "1.0.1", "redshift_model_version": "1.0.0" }, "great_expectations.__version__": "0.12.0" diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_metadata.json b/.test/great_expectations/expectations/mobile/v1/mobile_metadata.json index 26dc99c0..82e347a2 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_metadata.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_metadata.json @@ -102,8 +102,9 @@ ], "meta": { "versions": { - "test_suite_version": "1.0.0", - "redshift_model_version": "1.0.0" + "test_suite_version": "1.0.1", + "redshift_model_version": "1.0.0", + "bigquery_model_version": "1.0.0" }, "great_expectations.__version__": "0.12.0" } diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_screen_view_in_session_values.json b/.test/great_expectations/expectations/mobile/v1/mobile_screen_view_in_session_values.json index 2a4f3959..1cce6e8a 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_screen_view_in_session_values.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_screen_view_in_session_values.json @@ -26,8 +26,9 @@ ], "meta": { "versions": { - "test_suite_version": "1.0.0", - "redshift_model_version": "1.0.0" + "test_suite_version": "1.0.1", + "redshift_model_version": "1.0.0", + "bigquery_model_version": "1.0.0" }, "great_expectations.__version__": "0.12.0" } diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_screen_views.json b/.test/great_expectations/expectations/mobile/v1/mobile_screen_views.json index 6ec6b996..41c3f69f 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_screen_views.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_screen_views.json @@ -5,7 +5,7 @@ { "expectation_type": "expect_table_column_count_to_equal", "kwargs": { - "value": 57 + "value": 58 } }, { @@ -27,6 +27,7 @@ "dvce_created_tstamp", "collector_tstamp", "derived_tstamp", + "model_tstamp", "screen_view_name", "screen_view_transition_type", "screen_view_type", @@ -138,7 +139,12 @@ "column": "derived_tstamp" } }, - + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "model_tstamp" + } + }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { @@ -160,8 +166,8 @@ ], "meta": { "versions": { - "test_suite_version": "1.0.0", - "redshift_model_version": "1.0.0" + "test_suite_version": "1.0.1", + "bigquery_model_version": "1.0.0" }, "great_expectations.__version__": "0.12.0" } diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_screen_views_redshift.json b/.test/great_expectations/expectations/mobile/v1/mobile_screen_views_redshift.json new file mode 100644 index 00000000..2ac5f6b0 --- /dev/null +++ b/.test/great_expectations/expectations/mobile/v1/mobile_screen_views_redshift.json @@ -0,0 +1,168 @@ +{ + "data_asset_type": "Dataset", + "expectation_suite_name": "mobile_screen_views", + "expectations": [ + { + "expectation_type": "expect_table_column_count_to_equal", + "kwargs": { + "value": 57 + } + }, + { + "expectation_type": "expect_table_columns_to_match_ordered_list", + "kwargs": { + "column_list": [ + "screen_view_id", + "event_id", + "app_id", + "user_id", + "device_user_id", + "network_userid", + "session_id", + "session_index", + "previous_session_id", + "session_first_event_id", + "screen_view_in_session_index", + "screen_views_in_session", + "dvce_created_tstamp", + "collector_tstamp", + "derived_tstamp", + "screen_view_name", + "screen_view_transition_type", + "screen_view_type", + "screen_fragment", + "screen_top_view_controller", + "screen_view_controller", + "screen_view_previous_id", + "screen_view_previous_name", + "screen_view_previous_type", + "platform", + "dvce_screenwidth", + "dvce_screenheight", + "device_manufacturer", + "device_model", + "os_type", + "os_version", + "android_idfa", + "apple_idfa", + "apple_idfv", + "open_idfa", + "device_latitude", + "device_longitude", + "device_latitude_longitude_accuracy", + "device_altitude", + "device_altitude_accuracy", + "device_bearing", + "device_speed", + "geo_country", + "geo_region", + "geo_city", + "geo_zipcode", + "geo_latitude", + "geo_longitude", + "geo_region_name", + "geo_timezone", + "user_ipaddress", + "useragent", + "carrier", + "network_technology", + "network_type", + "build", + "version" + ] + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "screen_view_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "event_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "device_user_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "network_userid" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_index" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "screen_view_in_session_index" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "screen_views_in_session" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "dvce_created_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "collector_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "derived_tstamp" + } + }, + + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "screen_view_name" + } + }, + { + "expectation_type": "expect_column_values_to_be_unique", + "kwargs": { + "column": "event_id" + } + }, + { + "expectation_type": "expect_column_values_to_be_unique", + "kwargs": { + "column": "screen_view_id" + } + } + ], + "meta": { + "versions": { + "test_suite_version": "1.0.1", + "redshift_model_version": "1.0.0" + }, + "great_expectations.__version__": "0.12.0" + } +} diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_sessions.json b/.test/great_expectations/expectations/mobile/v1/mobile_sessions.json index 24f662ee..76dc110f 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_sessions.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_sessions.json @@ -5,7 +5,7 @@ { "expectation_type": "expect_table_column_count_to_equal", "kwargs": { - "value": 62 + "value": 63 } }, { @@ -20,6 +20,7 @@ "session_last_event_id", "start_tstamp", "end_tstamp", + "model_tstamp", "user_id", "device_user_id", "network_userid", @@ -113,6 +114,12 @@ "column": "end_tstamp" } }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "model_tstamp" + } + }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { @@ -146,8 +153,8 @@ ], "meta": { "versions": { - "test_suite_version": "1.0.0", - "redshift_model_version": "1.0.0" + "test_suite_version": "1.0.1", + "bigquery_model_version": "1.0.0" }, "great_expectations.__version__": "0.12.0" } diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_sessions_redshift.json b/.test/great_expectations/expectations/mobile/v1/mobile_sessions_redshift.json new file mode 100644 index 00000000..6ef2fbcd --- /dev/null +++ b/.test/great_expectations/expectations/mobile/v1/mobile_sessions_redshift.json @@ -0,0 +1,154 @@ +{ + "data_asset_type": "Dataset", + "expectation_suite_name": "mobile_sessions", + "expectations": [ + { + "expectation_type": "expect_table_column_count_to_equal", + "kwargs": { + "value": 62 + } + }, + { + "expectation_type": "expect_table_columns_to_match_ordered_list", + "kwargs": { + "column_list": [ + "app_id", + "session_id", + "session_index", + "previous_session_id", + "session_first_event_id", + "session_last_event_id", + "start_tstamp", + "end_tstamp", + "user_id", + "device_user_id", + "network_userid", + "session_duration_s", + "has_install", + "screen_views", + "screen_names_viewed", + "app_errors", + "fatal_app_errors", + "first_event_name", + "last_event_name", + "first_screen_view_name", + "first_screen_view_transition_type", + "first_screen_view_type", + "last_screen_view_name", + "last_screen_view_transition_type", + "last_screen_view_type", + "platform", + "dvce_screenwidth", + "dvce_screenheight", + "device_manufacturer", + "device_model", + "os_type", + "os_version", + "android_idfa", + "apple_idfa", + "apple_idfv", + "open_idfa", + "device_latitude", + "device_longitude", + "device_latitude_longitude_accuracy", + "device_altitude", + "device_altitude_accuracy", + "device_bearing", + "device_speed", + "geo_country", + "geo_region", + "geo_city", + "geo_zipcode", + "geo_latitude", + "geo_longitude", + "geo_region_name", + "geo_timezone", + "user_ipaddress", + "useragent", + "name_tracker", + "v_tracker", + "carrier", + "network_technology", + "network_type", + "first_build", + "last_build", + "first_version", + "last_version" + ] + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_index" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_first_event_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "session_last_event_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "start_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "end_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "device_user_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "network_userid" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "first_event_name" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "last_event_name" + } + }, + { + "expectation_type": "expect_column_values_to_be_unique", + "kwargs": { + "column": "session_id" + } + } + ], + "meta": { + "versions": { + "test_suite_version": "1.0.1", + "redshift_model_version": "1.0.0" + }, + "great_expectations.__version__": "0.12.0" + } +} diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_staging_reconciliation.json b/.test/great_expectations/expectations/mobile/v1/mobile_staging_reconciliation.json index e3ebec0d..7bf45272 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_staging_reconciliation.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_staging_reconciliation.json @@ -87,8 +87,9 @@ "meta": { "great_expectations.__version__": "0.12.0", "versions": { + "test_suite_version": "1.0.1", "redshift_model_version": "1.0.0", - "test_suite_version": "1.0.0" + "bigquery_model_version": "1.0.0" } } -} \ No newline at end of file +} diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_users.json b/.test/great_expectations/expectations/mobile/v1/mobile_users.json index fdf0a746..84819cdf 100644 --- a/.test/great_expectations/expectations/mobile/v1/mobile_users.json +++ b/.test/great_expectations/expectations/mobile/v1/mobile_users.json @@ -5,7 +5,7 @@ { "expectation_type": "expect_table_column_count_to_equal", "kwargs": { - "value": 40 + "value": 41 } }, { @@ -17,6 +17,7 @@ "network_userid", "start_tstamp", "end_tstamp", + "model_tstamp", "screen_views", "screen_names_viewed", "sessions", @@ -79,6 +80,12 @@ "column": "end_tstamp" } }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "model_tstamp" + } + }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { @@ -106,8 +113,8 @@ ], "meta": { "versions": { - "test_suite_version": "1.0.0", - "redshift_model_version": "1.0.0" + "test_suite_version": "1.0.1", + "bigquery_model_version": "1.0.0" }, "great_expectations.__version__": "0.12.0" } diff --git a/.test/great_expectations/expectations/mobile/v1/mobile_users_redshift.json b/.test/great_expectations/expectations/mobile/v1/mobile_users_redshift.json new file mode 100644 index 00000000..5b188f6d --- /dev/null +++ b/.test/great_expectations/expectations/mobile/v1/mobile_users_redshift.json @@ -0,0 +1,114 @@ +{ + "data_asset_type": "Dataset", + "expectation_suite_name": "mobile_users", + "expectations": [ + { + "expectation_type": "expect_table_column_count_to_equal", + "kwargs": { + "value": 40 + } + }, + { + "expectation_type": "expect_table_columns_to_match_ordered_list", + "kwargs": { + "column_list": [ + "user_id", + "device_user_id", + "network_userid", + "start_tstamp", + "end_tstamp", + "screen_views", + "screen_names_viewed", + "sessions", + "sessions_duration_s", + "active_days", + "app_errors", + "fatal_app_errors", + "first_screen_view_name", + "first_screen_view_transition_type", + "first_screen_view_type", + "last_screen_view_name", + "last_screen_view_transition_type", + "last_screen_view_type", + "platform", + "dvce_screenwidth", + "dvce_screenheight", + "device_manufacturer", + "device_model", + "os_type", + "first_os_version", + "last_os_version", + "android_idfa", + "apple_idfa", + "apple_idfv", + "open_idfa", + "geo_country", + "geo_region", + "geo_city", + "geo_zipcode", + "geo_latitude", + "geo_longitude", + "geo_region_name", + "geo_timezone", + "first_carrier", + "last_carrier" + ] + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "device_user_id" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "network_userid" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "start_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "end_tstamp" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "sessions" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "sessions_duration_s" + } + }, + { + "expectation_type": "expect_column_values_to_not_be_null", + "kwargs": { + "column": "active_days" + } + }, + { + "expectation_type": "expect_column_values_to_be_unique", + "kwargs": { + "column": "device_user_id" + } + } + ], + "meta": { + "versions": { + "test_suite_version": "1.0.1", + "redshift_model_version": "1.0.0" + }, + "great_expectations.__version__": "0.12.0" + } +} diff --git a/.test/great_expectations/validation_configs/mobile/v1/bigquery/perm_tables.json b/.test/great_expectations/validation_configs/mobile/v1/bigquery/perm_tables.json new file mode 100644 index 00000000..19707374 --- /dev/null +++ b/.test/great_expectations/validation_configs/mobile/v1/bigquery/perm_tables.json @@ -0,0 +1,45 @@ +{ + "validation_operator_name": "action_list_operator", + "batches": [ + { + "batch_kwargs": { + "table": "derived.mobile_screen_views", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_derived_mobile_sv" + }, + "expectation_suite_names": ["mobile.v1.mobile_screen_views"] + }, + { + "batch_kwargs": { + "query": "SELECT session_id, count(DISTINCT screen_views_in_session) AS dist_svis_values, count(*) - count(DISTINCT screen_view_in_session_index) AS all_minus_dist_svisi, count(*) - count(DISTINCT screen_view_id) AS all_minus_dist_svids FROM derived.mobile_screen_views GROUP BY 1", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_derived_mobile_sv_in_sess_index" + }, + "expectation_suite_names": ["mobile.v1.mobile_screen_view_in_session_values"] + }, + { + "batch_kwargs": { + "table": "derived.mobile_sessions", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_derived_mobile_sess" + }, + "expectation_suite_names": ["mobile.v1.mobile_sessions"] + }, + { + "batch_kwargs": { + "table": "derived.mobile_users", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_derived_mobile_usr" + }, + "expectation_suite_names": ["mobile.v1.mobile_users"] + }, + { + "batch_kwargs": { + "query": "SELECT *, rows_this_run - distinct_key_count AS diff_rows FROM derived.datamodel_metadata WHERE model = 'mobile'", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_derived_mobile_metadata" + }, + "expectation_suite_names": ["mobile.v1.mobile_metadata"] + } + ] +} diff --git a/.test/great_expectations/validation_configs/mobile/v1/bigquery/temp_tables.json b/.test/great_expectations/validation_configs/mobile/v1/bigquery/temp_tables.json new file mode 100644 index 00000000..545dce7f --- /dev/null +++ b/.test/great_expectations/validation_configs/mobile/v1/bigquery/temp_tables.json @@ -0,0 +1,59 @@ +{ + "validation_operator_name": "action_list_operator", + "batches": [ + { + "batch_kwargs": { + + "table": "scratch.mobile_events_this_run", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_scratch_mobile_events_this_run" + }, + "expectation_suite_names": ["mobile.v1.mobile_base"] + }, + { + "batch_kwargs": { + + "table": "scratch.mobile_events_staged", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_scratch_mobile_events_staged" + }, + "expectation_suite_names": ["mobile.v1.mobile_base"] + }, + { + "batch_kwargs": { + + "table": "scratch.mobile_screen_views_this_run", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_scratch_mobile_sv_this_run" + }, + "expectation_suite_names": ["mobile.v1.mobile_screen_views"] + }, + { + "batch_kwargs": { + + "table": "scratch.mobile_screen_views_staged", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_scratch_mobile_sv_staged" + }, + "expectation_suite_names": ["mobile.v1.mobile_screen_views"] + }, + { + "batch_kwargs": { + + "table": "scratch.mobile_sessions_this_run", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_scratch_mobile_sess_this_run" + }, + "expectation_suite_names": ["mobile.v1.mobile_sessions"] + }, + { + "batch_kwargs": { + + "table": "scratch.mobile_users_this_run", + "datasource": "bigquery", + "bigquery_temp_table": "ge_test_scratch_mobile_usr_this_run" + }, + "expectation_suite_names": ["mobile.v1.mobile_users"] + } + ] +} diff --git a/.test/great_expectations/validation_configs/mobile/v1/redshift/perm_tables.json b/.test/great_expectations/validation_configs/mobile/v1/redshift/perm_tables.json index 7bdb7e2f..ea27dd3f 100644 --- a/.test/great_expectations/validation_configs/mobile/v1/redshift/perm_tables.json +++ b/.test/great_expectations/validation_configs/mobile/v1/redshift/perm_tables.json @@ -7,7 +7,7 @@ "table": "mobile_screen_views", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_screen_views"] + "expectation_suite_names": ["mobile.v1.mobile_screen_views_redshift"] }, { "batch_kwargs": { @@ -22,7 +22,7 @@ "table": "mobile_sessions", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_sessions"] + "expectation_suite_names": ["mobile.v1.mobile_sessions_redshift"] }, { "batch_kwargs": { @@ -30,7 +30,7 @@ "table": "mobile_users", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_users"] + "expectation_suite_names": ["mobile.v1.mobile_users_redshift"] }, { "batch_kwargs": { diff --git a/.test/great_expectations/validation_configs/mobile/v1/redshift/temp_tables.json b/.test/great_expectations/validation_configs/mobile/v1/redshift/temp_tables.json index 25f9a3e8..c22b26c8 100644 --- a/.test/great_expectations/validation_configs/mobile/v1/redshift/temp_tables.json +++ b/.test/great_expectations/validation_configs/mobile/v1/redshift/temp_tables.json @@ -23,7 +23,7 @@ "table": "mobile_screen_views_this_run", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_screen_views"] + "expectation_suite_names": ["mobile.v1.mobile_screen_views_redshift"] }, { "batch_kwargs": { @@ -31,7 +31,7 @@ "table": "mobile_screen_views_staged", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_screen_views"] + "expectation_suite_names": ["mobile.v1.mobile_screen_views_redshift"] }, { "batch_kwargs": { @@ -39,7 +39,7 @@ "table": "mobile_sessions_this_run", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_sessions"] + "expectation_suite_names": ["mobile.v1.mobile_sessions_redshift"] }, { "batch_kwargs": { @@ -47,7 +47,7 @@ "table": "mobile_users_this_run", "datasource": "redshift" }, - "expectation_suite_names": ["mobile.v1.mobile_users"] + "expectation_suite_names": ["mobile.v1.mobile_users_redshift"] }, { "batch_kwargs": { diff --git a/CHANGELOG b/CHANGELOG index 563e4db1..4c401e39 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +BigQuery Mobile Version 1.0.0 (2021-04-26) +--------------------------------------- +Update licence (Close #79) +Fix credential loading in scripts (close #72) +Mobile: Add BigQuery mobile model v1 (close #73) + Redshift Mobile Version 1.0.0 (2021-03-25) --------------------------------------- Update scripts for mobile model (#66) diff --git a/LICENSE-2.0.txt b/LICENSE-2.0.txt index d6456956..2fad1ecd 100644 --- a/LICENSE-2.0.txt +++ b/LICENSE-2.0.txt @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2020-2021 Snowplow Analytics Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index c632a0a8..61974302 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ If you don't have a pipeline yet, you might be interested in finding out what Sn - [Snowflake](web/v1/snowflake) - [Mobile (v1)](mobile/v1) - [Redshift](mobile/v1/redshift) - - BigQuery (coming soon) + - [BigQuery](mobile/v1/bigquery) - Snowflake (coming soon) Documentation for the data models can be found on [our documentation site][docs-data-models]. diff --git a/mobile/v1/bigquery/CHANGELOG b/mobile/v1/bigquery/CHANGELOG new file mode 100644 index 00000000..814879f5 --- /dev/null +++ b/mobile/v1/bigquery/CHANGELOG @@ -0,0 +1,3 @@ +Version 1.0.0 (2021-04-26) +-------------------------- +Mobile: Add BigQuery mobile model v1 (close #73) diff --git a/mobile/v1/bigquery/README.md b/mobile/v1/bigquery/README.md new file mode 100644 index 00000000..9d6ab683 --- /dev/null +++ b/mobile/v1/bigquery/README.md @@ -0,0 +1,232 @@ +# BigQuery v1 mobile model README + +This readme contains a quickstart guide, and details of how the modules interact with each other. For a guide to configuring each module, there is a README in each of the modules' `playbooks` directory. + +To customise the model, we recommend following the guidance found in the README in the `sql/custom` directory. + +## Quickstart + +### Prerequisites + +[SQL-runner](https://github.com/snowplow/sql-runner) must be installed, and a dataset of mobile events from either the Snowplow [iOS tracker](https://docs.snowplowanalytics.com/docs/collecting-data/collecting-from-own-applications/objective-c-tracker/) or [Android tracker](https://docs.snowplowanalytics.com/docs/collecting-data/collecting-from-own-applications/android-tracker/) must be available in the database. The session context and screen view events most both be enabled for the mobile model to run. + +### Configuration + +#### Authentication + +First, fill in the connection details for the target database in the relevant template in `.scripts/template/bigquery.yml.tmpl`. + +Set an environment variable, `GOOGLE_APPLICATION_CREDENTIALS`, to the path of your GBQ json credential file. See the README in `.scripts` for more detail. + +#### Contexts + +The following contexts can be enabled depending on your tracker configuration: + +- Mobile context +- Geolocation context +- Application context +- Screen context + +By default they are disabled. For more details on how to enable please see the [README](sql-runner/playbooks/standard/01-base/README.md) in the Base module's playbooks folder. + +#### Optional Modules + +Currently the app errors module for crash reporting is the only optional module. More will be added in the future as the tracker's functionality expands. + +Assuming your tracker is capturing `application_error` events, the module can be enabled within the app errors playbook. For more details on how to enable please see the [README](sql-runner/playbooks/standard/03-optional-modules/01-app-errors/README.md) in the app errors module's playbooks folder. + +#### Variables + +Variables in each module's playbook can also optionally be configured also. See each playbook directory's README for more detail on configuration of each module. + +### Run using the `run_config.sh` script + +To run the entire standard model, end to end: + +```bash +bash .scripts/run_config.sh -b ~/pathTo/sql-runner -c mobile/v1/bigquery/sql-runner/configs/datamodeling.json -t .scripts/templates/bigquery.yml.tmpl; +``` + +See the README in the `.scripts/` directory for more details. + +## Custom Modules + +A guide to creating custom modules can be found in the README of the `sql/custom/` directory of the relevant model. Each custom module created must consist of a set of sql files and a playbook, or set of playbooks. The helper scripts described above can also be used to run custom modules. + +## Testing + +### Setup + +Python3 is required. + +Install Great Expectations and dependencies, and configure a datasource: + +```bash +cd .test +pip3 install -r requirements.txt +great_expectations datasource new +``` + +Follow the CLI guide to configure access to your database. The configuration for your datasource will be generated in `.test/great_expectations/config/config_variables.tml` - these values can be replaced by environment variables if desired. + +Please be aware that the names of the tables to test have been hardcoded in the [validation configs](.test/great_expectations/validation_configs). If you are using a custom values for any of the `entropy`, `scratch_schema` or `output_schema` variables within your playbooks, you will need to manually ammend the validation configs accordingly. + +If you have enabled any optional modules within the main mobile model, you will need to enable tests on these modules too. For more details on how to enable please see the [README](sql-runner/playbooks/tests/00-staging-reconciliation/README.md) in the staging reconciliation module's playbooks folder. + +### Using the helper scripts + +To run the test suites alone: + +```bash +bash .scripts/run_test.sh -d bigquery -c perm_tables -m mobile -a {credentials (optional)} +bash .scripts/run_test.sh -d bigquery -c temp_tables -m mobile -a {credentials (optional)} +``` + +To run an entire run of the standard model, and tests end to end: + +```bash +bash .scripts/e2e.sh -b {path_to_sql_runner} -d bigquery -m mobile -a {credentials (optional)} +``` + +To run a full battery of ten runs of the standard model, and tests: + +```bash +bash .scripts/pr_check.sh -b {path_to_sql_runner} -d bigquery -m mobile -a {credentials (optional)} +``` + +### Adding to tests + +Check out the [Great Expectations documentation](https://docs.greatexpectations.io/en/latest/) for guidance on using it to run existing test suites directly, create new expectations, use the profiler, and autogenerate data documentation. + +Quickstart to create a new test suite: + +`great_expectations suite new` + +## Modules detail + +### 01-base + +Inputs: atomic tables, `{{.output_schema}}.mobile_base_event_id_manifest`, `{{.output_schema}}.mobile_base_session_id_manifest` + +Persistent Outputs: `{{.scratch_schema}}.mobile_events_staged`, + +Temporary Outputs: `{{.scratch_schema}}.mobile_events_this_run`, `{{.scratch_schema}}.mobile_base_duplicates_this_run` + +The base module executes the incremental logic of the model - it retrieves all events for sessions with new data, deduplicates, and adds any enabled contexts. + +The base module's 'complete' playbook (`99-base-complete.yml.tmpl`) updates the two relevant manifests, and cleans up temporary tables. The lifecycle of the `{{.scratch_schema}}.mobile_events_staged` table is completed by the `99-sessions-complete.yml.tmpl` step of the sessions module, when the table is truncated. This truncation can only occur during the completion step of the sessions module as `{{.scratch_schema}}.mobile_events_staged` is required as an input to the sessions module. This differs to the web model where the page views module's complete step would contain the truncation step. + +The `{{.scratch_schema}}.mobile_events_this_run` table contains all events relevant only to this run of the model (since the last time the `99-base-complete.yml.tmpl` playbook has run). This table is dropped and recomputed _every_ run of the module, regardless of whether another module has used the data. + +If there is a requirement that a custom module consumes data _more frequently than the screen views module for example_, the `{{.scratch_schema}}.mobile_events_this_run` table may be used for this purpose. + +The `{{.scratch_schema}}.mobile_events_staged` table is incrementally updated to contain all events relevant to any run of the base module _since the last time the sessions module consumed it_ (ie since the last time the `99-sessions-complete.yml.tmpl` has run). This allows one to run the base module more frequently than the subsequent modules (if, for example, a custom module reads from events_this_run). + +Detail on configuring the base module's playbook can be found [in the relevant playbook directory's README](sql-runner/playbooks/standard/01-base). + +### 02-screen-views + +Inputs: atomic tables, `{{.scratch_schema}}.mobile_events_staged` + +Persistent Outputs: `{{.output_schema}}.mobile_screen_views`, `{{.scratch_schema}}.mobile_screen_views_staged` + +Temporary Outputs: `{{.scratch_schema}}.mobile_screen_views_this_run` + +The screen views module takes `{{.scratch_schema}}.mobile_events_staged` as its input, joins in and deduplicates screen_view_id, calculates the standard mobile screen views model, and updates the production mobile_screen_views table. It also produces the `{{.scratch_schema}}.mobile_screen_views_staged` and `{{.scratch_schema}}.mobile_screen_views_this_run` tables. + +The screen views module's 'complete' playbook `99-screen-views-complete.yml.tmpl` cleans up temporary tables. The lifecycle of the `{{.scratch_schema}}.mobile_screen_views_staged` table is completed by the `99-sessions-complete.yml.tmpl` step (of the subsequent module). + +The `{{.scratch_schema}}.mobile_screen_views_this_run` table contains all events relevant only to this run of the model (since the last time the `99-screen-views-complete.yml.tmpl` playbook has run). This table is dropped and recomputed _every_ run of the module, regardless of whether another module has used the data. + +If there is a requirement that a custom module consumes data _more frequently than the sessions module_, the `{{.scratch_schema}}.mobile_screen_views_this_run` table may be used for this purpose. + +The `{{.scratch_schema}}.mobile_screen_views_staged` table is incrementally updated to contain all events relevant to any run of the screen views module _since the last time the sessions module consumed it_ (ie since the last time the `99-sessions-complete.yml.tmpl` playbook has run). This allows one to run the screen views module more frequently than the sessions module (if, for example, a custom module reads from mobile_screen_views_this_run). + +Detail on configuring the screen views module's playbook can be found [in the relevant playbook directory's README](sql-runner/playbooks/02-screen-views). + +### 03-optional-modules + +#### 01-app-errors + +Inputs: atomic tables, `{{.scratch_schema}}.mobile_events_staged` + +Persistent Outputs: `{{.output_schema}}.mobile_app_errors`, `{{.scratch_schema}}.mobile_app_errors_staged` + +Temporary Outputs: `{{.scratch_schema}}.mobile_app_errors_this_run` + +The app errors module takes `{{.scratch_schema}}.mobile_events_staged` as its input, joins in the app errors context, calculates the app errors model, and updates the production mobile_app_errors table. It also produces the `{{.scratch_schema}}.mobile_app_errors_staged` and `{{.scratch_schema}}.mobile_app_errors_this_run` tables. + +This crash reporting module is disabled by default since it is not a requirement to run the mobile model. Despite this, the `{{.scratch_schema}}.mobile_app_errors_staged` table will be created irrespectively. This is to allow the sessions module to run correctly where the `{{.scratch_schema}}.mobile_app_errors_staged` table is required as an input. + +The app errors module's 'complete' playbook `99-app-errors-complete.yml.tmpl` cleans up temporary tables. The lifecycle of the `{{.scratch_schema}}.mobile_app_errors_staged` table is completed by the `99-sessions-complete.yml.tmpl` step (of the subsequent module). + +The `{{.scratch_schema}}.mobile_app_errors_this_run` table contains all events relevant only to this run of the model (since the last time the `99-app-errors-complete.yml.tmpl` playbook has run). This table is dropped and recomputed _every_ run of the module, regardless of whether another module has used the data. + +If there is a requirement that a custom module consumes data _more frequently than the sessions module_, the `{{.scratch_schema}}.mobile_app_errors_this_run` table may be used for this purpose. + +The `{{.scratch_schema}}.mobile_app_errors_staged` table is incrementally updated to contain all events relevant to any run of the screen views module _since the last time the sessions module consumed it_ (ie since the last time the `99-sessions-complete.yml.tmpl` playbook has run). This allows one to run the app errors module more frequently than the sessions module (if, for example, a custom module reads from mobile_app_errors_this_run). + +Detail on configuring the app errors module's playbook can be found [in the relevant playbook directory's README](sql-runner/playbooks/03-optional-modules/01-app-errors). + +### 04-sessions + +Inputs: `{{.scratch_schema}}.mobile_screen_views_staged`, `{{.scratch_schema}}.mobile_app_errors_staged`, `{{.scratch_schema}}.mobile_events_staged` + +Persistent Outputs: `{{.output_schema}}.mobile_sessions`, `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged` + +Temporary Outputs: `{{.scratch_schema}}.mobile_sessions_this_run` + +The sessions module takes the `_staged` output tables of the upstream modules as its input, calculates the standard sessions model, and updates the production sessions table. It also produces the `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged` and `{{.scratch_schema}}.mobile_sessions_this_run{{.entropy}}` tables. + +Unlike the other modules, the sessions module outputs a manifest of IDs as its staged table rather than a table containing all unprocessed data - this is due to the fact that the users step requires a longer lookback than the incremental structure contains, so there are obviously efficiency limitations. + +The sessions module's 'complete' playbook `99-sessions-complete.yml.tmpl` truncates the input tables, and cleans up temporary tables. The lifecycle of the `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged` table is completed by the `99-users-complete.yml.tmpl` step (of the subsequent module). + +The `{{.scratch_schema}}.mobile_sessions_this_run` table contains all events relevant only to this run of the model (since the last time the `99-sessions-complete.yml.tmpl` playbook has run). This table is dropped and recomputed _every_ run of the module, regardless of whether another module has used the data. + +If there is a requirement that a custom module consumes data _more frequently than the users module_, the `{{.scratch_schema}}.mobile_sessions_this_run` table may be used for this purpose. + +The `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged` table is incrementally updated to contain all IDs relevant to any run of the sessions module _since the last time the users module consumed it_ (ie since the last time the `99-users-complete.yml.tmpl` playbook has run). This allows one to run the sessions module more frequently than the users module (if, for example, a custom module reads from sessions_this_run and is more frequent than the page views module). + +Detail on configuring the sessions module's playbook can be found [in the relevant playbook directory's README](sql-runner/playbooks/04-sessions). + +### 05-users + +Inputs: `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged`, `{{.output_schema}}.mobile_users_manifest` + +Persistent Outputs: `{{.output_schema}}.mobile_users` + +Temporary Outputs: `{{.scratch_schema}}.mobile_users_this_run` + +The sessions module takes `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged` as its input, alongside the `{{.output_schema}}.mobile_users_manifest` table (which is self-maintained within the users module). It calculates the standard users model, and updates the production users table. It also produces the `{{.scratch_schema}}.mobile_users_this_run` table. + +Unlike the other modules, the users module doesn't take an input that contains all information required to run the module. It uses the `{{.output_schema}}.mobile_users_manifest` table to manage efficiency, and queries the sessions table to process data as far back in history as is required. + +The users module's 'complete' playbook `99-users-complete.yml.tmpl` truncates the `{{.scratch_schema}}.mobile_sessions_userid_manifest_staged` table, commits to the `{{.output_schema}}.mobile_users_manifest` and cleans up temporary tables. There is no `_staged` table for this module, as there are no subsequent modules. + +The `{{.scratch_schema}}.mobile_users_this_run` table contains all events relevant only to this run of the model (since the last time the `99-users-complete.yml.tmpl` playbook has run). This table is dropped and recomputed _every_ run of the module, regardless of whether another module has used the data. + +Detail on configuring the users module's playbook can be found [in the relevant playbook directory's README](sql-runner/playbooks/standard/05-users). + +## Scheduling + +### Asynchronous Runs + +While the model is configured by default to run the entire way through, i.e. from the base module through to the users module, it is possible to run each module independently. For instance one could run the screen views module hourly while only running the sessions module daily. To do so you should run hourly all modules up to and including the desired module i.e. the base and screen view modules. The sessions module can then be run on a daily schedule. A few points to note: + +- It is only when the sessions module is run that the `{{.scratch_schema}}.mobile_events_staged` is truncated. As a result, the hourly runs of the screen views module will both process new events data as well as re-process data stored in `mobile_events_staged` since the last time the sessions module ran. +- Prior to running sessions module ensure that all input modules have been run i.e. base, screen views and _any enabled optional modules_. This ensures all the inputs are up to date and in-sync. + +### Incomplete Runs + +It is not a requirement to run every module. For example you may decide you do not need sessions or users data and only want screen view data. To do so: + +- Set `stage_next` to `False` and `:ends_run:` to true in the screen views module. See the [README](sql-runner/playbooks/02-screen-views) for more details. +- Run all modules up to and including the screen views module. +- Ensure that the sessions 'complete' playbook, `99-sessions-complete.yml.tmpl`, is the last step in the run. This playbook includes the truncation of the `mobile_events_staged` table. Without this truncation _each subsequent run will re-process data severely impacting performance._ + +## A note on duplicates + +This version of the model (1.0.0) contains deduplication steps in both the base and screen views modules. The base module deduplicates on `event_id`, where _only_ the first row per `event_id` is kept (ordered by `collector_tstamp`). + +The screen view module deduplicates on `screen_view_id`, where _only_ the first row per `screen_view_id` is kept (ordered by `derived_tstamp`). diff --git a/mobile/v1/bigquery/sql-runner/configs/datamodeling.json b/mobile/v1/bigquery/sql-runner/configs/datamodeling.json new file mode 100644 index 00000000..02043473 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/configs/datamodeling.json @@ -0,0 +1,64 @@ +{ + "schema": "iglu:com.snowplowanalytics.datamodeling/config/jsonschema/1-0-0", + "data": { + "enabled": true, + "storage": "BigQuery", + "sqlRunner": "0.9.3", + "playbooks": [ + { + "playbook": "standard/01-base/01-base-main", + "dependsOn": [] + }, + { + "playbook": "standard/02-screen-views/01-screen-views-main", + "dependsOn": ["standard/01-base/01-base-main"] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "dependsOn": ["standard/01-base/01-base-main"] + }, + { + "playbook": "standard/04-sessions/01-sessions-main", + "dependsOn": ["standard/01-base/01-base-main", + "standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main" + ] + }, + { + "playbook": "standard/05-users/01-users-main", + "dependsOn": ["standard/04-sessions/01-sessions-main"] + }, + { + "playbook": "standard/01-base/99-base-complete", + "dependsOn": ["standard/01-base/01-base-main", + "standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "standard/04-sessions/01-sessions-main" + ] + }, + { + "playbook": "standard/02-screen-views/99-screen-views-complete", + "dependsOn": ["standard/02-screen-views/01-screen-views-main"] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/99-app-errors-complete", + "dependsOn": ["standard/03-optional-modules/01-app-errors/01-app-errors-main"] + }, + { + "playbook": "standard/04-sessions/99-sessions-complete", + "dependsOn": ["standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "standard/04-sessions/01-sessions-main", + "standard/05-users/01-users-main" + ] + }, + { + "playbook": "standard/05-users/99-users-complete", + "dependsOn": ["standard/05-users/01-users-main"] + } + ], + "lockType": "hard", + "owners": [ + ] + } +} diff --git a/mobile/v1/bigquery/sql-runner/configs/datamodeling_custom_module.json b/mobile/v1/bigquery/sql-runner/configs/datamodeling_custom_module.json new file mode 100644 index 00000000..85921155 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/configs/datamodeling_custom_module.json @@ -0,0 +1,74 @@ +{ + "schema": "iglu:com.snowplowanalytics.datamodeling/config/jsonschema/1-0-0", + "data": { + "enabled": true, + "storage": "BigQuery", + "sqlRunner": "0.9.3", + "playbooks": [ + { + "playbook": "standard/01-base/01-base-main", + "dependsOn": [] + }, + { + "playbook": "standard/02-screen-views/01-screen-views-main", + "dependsOn": ["standard/01-base/01-base-main"] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "dependsOn": ["standard/01-base/01-base-main"] + }, + { + "playbook": "standard/04-sessions/01-sessions-main", + "dependsOn": ["standard/01-base/01-base-main", + "standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main" + ] + }, + { + "playbook": "custom/04-session-goals/01-session-goals-main", + "dependsOn": ["standard/04-sessions/01-sessions-main"] + }, + { + "playbook": "standard/05-users/01-users-main", + "dependsOn": ["standard/04-sessions/01-sessions-main"] + }, + { + "playbook": "standard/01-base/99-base-complete", + "dependsOn": ["standard/01-base/01-base-main", + "standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "standard/04-sessions/01-sessions-main" + ] + }, + { + "playbook": "standard/02-screen-views/99-screen-views-complete", + "dependsOn": ["standard/02-screen-views/01-screen-views-main", + "custom/04-session-goals/01-session-goals-main"] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/99-app-errors-complete", + "dependsOn": ["standard/03-optional-modules/01-app-errors/01-app-errors-main"] + }, + { + "playbook": "custom/04-session-goals/99-session-goals-complete", + "dependsOn": ["custom/04-session-goals/01-session-goals-main"] + }, + { + "playbook": "standard/04-sessions/99-sessions-complete", + "dependsOn": ["standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "standard/04-sessions/01-sessions-main", + "custom/04-session-goals/99-session-goals-complete", + "standard/05-users/01-users-main" + ] + }, + { + "playbook": "standard/05-users/99-users-complete", + "dependsOn": ["standard/05-users/01-users-main"] + } + ], + "lockType": "hard", + "owners": [ + ] + } +} diff --git a/mobile/v1/bigquery/sql-runner/configs/post_test.json b/mobile/v1/bigquery/sql-runner/configs/post_test.json new file mode 100644 index 00000000..6b759657 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/configs/post_test.json @@ -0,0 +1,37 @@ +{ + "schema": "iglu:com.snowplowanalytics.datamodeling/config/jsonschema/1-0-0", + "data": { + "enabled": true, + "storage": "BigQuery", + "sqlRunner": "0.9.3", + "playbooks": [ + { + "playbook": "standard/01-base/99-base-complete", + "dependsOn": [] + }, + { + "playbook": "standard/02-screen-views/99-screen-views-complete", + "dependsOn": [] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/99-app-errors-complete", + "dependsOn": [] + }, + { + "playbook": "standard/04-sessions/99-sessions-complete", + "dependsOn": [] + }, + { + "playbook": "standard/05-users/99-users-complete", + "dependsOn": [] + }, + { + "playbook": "tests/00-staging-reconciliation/99-staging-reconciliation-complete", + "dependsOn": [] + } + ], + "lockType": "hard", + "owners": [ + ] + } +} diff --git a/mobile/v1/bigquery/sql-runner/configs/pre_test.json b/mobile/v1/bigquery/sql-runner/configs/pre_test.json new file mode 100644 index 00000000..64bc2dc2 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/configs/pre_test.json @@ -0,0 +1,44 @@ +{ + "schema": "iglu:com.snowplowanalytics.datamodeling/config/jsonschema/1-0-0", + "data": { + "enabled": true, + "storage": "BigQuery", + "sqlRunner": "0.9.3", + "playbooks": [ + { + "playbook": "standard/01-base/01-base-main", + "dependsOn": [] + }, + { + "playbook": "standard/02-screen-views/01-screen-views-main", + "dependsOn": ["standard/01-base/01-base-main"] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "dependsOn": ["standard/01-base/01-base-main"] + }, + { + "playbook": "standard/04-sessions/01-sessions-main", + "dependsOn": ["standard/01-base/01-base-main", + "standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main" + ] + }, + { + "playbook": "standard/05-users/01-users-main", + "dependsOn": ["standard/04-sessions/01-sessions-main"] + }, + { + "playbook": "tests/00-staging-reconciliation/01-staging-reconciliation-main", + "dependsOn": ["standard/01-base/01-base-main", + "standard/02-screen-views/01-screen-views-main", + "standard/03-optional-modules/01-app-errors/01-app-errors-main", + "standard/04-sessions/01-sessions-main" + ] + } + ], + "lockType": "hard", + "owners": [ + ] + } +} diff --git a/mobile/v1/bigquery/sql-runner/configs/teardown_all.json b/mobile/v1/bigquery/sql-runner/configs/teardown_all.json new file mode 100644 index 00000000..363d2cd8 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/configs/teardown_all.json @@ -0,0 +1,33 @@ +{ + "schema": "iglu:com.snowplowanalytics.datamodeling/config/jsonschema/1-0-0", + "data": { + "enabled": true, + "storage": "BigQuery", + "sqlRunner": "0.9.3", + "playbooks": [ + { + "playbook": "standard/01-base/XX-destroy-base", + "dependsOn": [] + }, + { + "playbook": "standard/02-screen-views/XX-destroy-screen-views", + "dependsOn": [] + }, + { + "playbook": "standard/03-optional-modules/01-app-errors/XX-destroy-app-errors", + "dependsOn": [] + }, + { + "playbook": "standard/04-sessions/XX-destroy-sessions", + "dependsOn": [] + }, + { + "playbook": "standard/05-users/XX-destroy-users", + "dependsOn": [] + } + ], + "lockType": "hard", + "owners": [ + ] + } +} diff --git a/mobile/v1/bigquery/sql-runner/playbooks/custom/04-session-goals/01-session-goals-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/custom/04-session-goals/01-session-goals-main.yml.tmpl new file mode 100644 index 00000000..abce5235 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/custom/04-session-goals/01-session-goals-main.yml.tmpl @@ -0,0 +1,20 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :scratch_schema: scratch + :output_schema: derived + :entropy: "" +:steps: +- :name: 01-session-goals-staged + :queries: + - :name: 01-session-goals-staged + :file: custom/04-session-goals/01-session-goals-staged.sql + :template: true +- :name: 02-session-goals-upsert + :queries: + - :name: 02-session-goals-upsert + :file: custom/04-session-goals/02-session-goals-upsert.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/custom/04-session-goals/99-session-goals-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/custom/04-session-goals/99-session-goals-complete.yml.tmpl new file mode 100644 index 00000000..86771cd8 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/custom/04-session-goals/99-session-goals-complete.yml.tmpl @@ -0,0 +1,15 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :scratch_schema: scratch + :output_schema: derived + :entropy: "" +:steps: +- :name: 99-session-goals-cleanup + :queries: + - :name: 99-session-goals-cleanup + :file: custom/04-session-goals/99-session-goals-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/custom/README.md b/mobile/v1/bigquery/sql-runner/playbooks/custom/README.md new file mode 100644 index 00000000..67f19519 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/custom/README.md @@ -0,0 +1,5 @@ +# Custom playbooks + +This directory contains playbooks for the example custom modules [the custom sql directory](../../sql/custom). + +The readme in that directory also contains a guide to adding custom modules. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/00-setup-metadata.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/00-setup-metadata.yml.tmpl new file mode 100644 index 00000000..c581b000 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/00-setup-metadata.yml.tmpl @@ -0,0 +1,17 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :output_schema: derived + :entropy: "" +:steps: +- :name: 00-setup-metadata + :queries: + - :name: 00-setup-metadata + :file: standard/00-setup/01-main/00-setup-metadata.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/99-metadata-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/99-metadata-complete.yml.tmpl new file mode 100644 index 00000000..4677feac --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/99-metadata-complete.yml.tmpl @@ -0,0 +1,17 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :output_schema: derived + :entropy: "" +:steps: +- :name: 01-cleanup-metadata + :queries: + - :name: 01-cleanup-metadata + :file: standard/00-setup/99-complete/01-cleanup-metadata.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/README.md b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/README.md new file mode 100644 index 00000000..5a9d205b --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/README.md @@ -0,0 +1,9 @@ +# Setup step playbooks + +These playbooks exist to accommodate scheduling jobs which don't conform to the usual flow of the standard model - they create and destroy the necessary tables to log metadata, with an ID which persists across modules. + +If running the standard model alone, or the standard model alongside custom steps, it is not necessary to run these steps. Instead, we can configure the `:ends_run:` variable to `true` in the `complete` playbook for the last module run in the standard model. + +In a scenario where we don't run the standard module, or we run portions of it on differing schedules, we can run the `00-setup-metadata.ymp.tmpl` playbook as the first step - to set up metadata and create the temporary run ID, and the `99-complete-metadata.yml.tmpl` playbook as the last step - to destroy the temporary run ID. + +If we would like to destroy the metadata tables for a full rebuild of the model, we may run the `XX-destroy-metadata.yml.tmpl` playbook to do so. It is advisable to rename the metadata table instead, however, in case there is some unforeseen need to see that data. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/XX-destroy-metadata.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/XX-destroy-metadata.yml.tmpl new file mode 100644 index 00000000..3ed3c49c --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/00-setup/XX-destroy-metadata.yml.tmpl @@ -0,0 +1,22 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :output_schema: derived + :entropy: "" +:steps: +- :name: 01-cleanup-metadata + :queries: + - :name: 01-cleanup-metadata + :file: standard/00-setup/99-complete/01-cleanup-metadata.sql + :template: true +- :name: XX-destroy-metadata + :queries: + - :name: XX-destroy-metadata + :file: standard/00-setup/XX-destroy/XX-destroy-metadata.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/01-base-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/01-base-main.yml.tmpl new file mode 100644 index 00000000..d139c1c5 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/01-base-main.yml.tmpl @@ -0,0 +1,77 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :input_schema: atomic + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :stage_next: true + :start_date: 2020-01-01 + :lookback_window_hours: + :days_late_allowed: + :session_lookback_days: + :update_cadence_days: + :upsert_lookback_days: + :derived_tstamp_partitioned: + :mobile_context: false + :geolocation_context: false + :application_context: false + :screen_context: false + :platform_filters: [] + :app_id_filters: [] +:steps: +- :name: 01-functions-and-procedures + :queries: + - :name: 01-functions-and-procedures + :file: standard/00-setup/01-main/01-functions-and-procedures.sql + :template: true +- :name: 00-setup-base + :queries: + - :name: 00-setup-base + :file: standard/01-base/01-main/00-setup-base.sql + :template: true +- :name: 01-new-events-limits + :queries: + - :name: 01-new-events-limits + :file: standard/01-base/01-main/01-new-events-limits.sql + :template: true +- :name: 02-run-manifest + :queries: + - :name: 02-run-manifest + :file: standard/01-base/01-main/02-run-manifest.sql + :template: true +- :name: 03-sessions-to-process + :queries: + - :name: 03-sessions-to-process + :file: standard/01-base/01-main/03-sessions-to-process.sql + :template: true +- :name: 04-sessions-to-include + :queries: + - :name: 04-sessions-to-include + :file: standard/01-base/01-main/04-sessions-to-include.sql + :template: true +- :name: 05-batch-limits + :queries: + - :name: 05-batch-limits + :file: standard/01-base/01-main/05-batch-limits.sql + :template: true +- :name: 06-events-this-run + :queries: + - :name: 06-events-this-run + :file: standard/01-base/01-main/06-events-this-run.sql + :template: true +- :name: 07-base-metadata + :queries: + - :name: 07-base-metadata + :file: standard/01-base/01-main/07-base-metadata.sql + :template: true +- :name: 08-commit-base + :queries: + - :name: 08-commit-base + :file: standard/01-base/01-main/08-commit-base.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/99-base-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/99-base-complete.yml.tmpl new file mode 100644 index 00000000..bb5ef973 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/99-base-complete.yml.tmpl @@ -0,0 +1,24 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: false +:steps: +- :name: 98-base-manifest + :queries: + - :name: 98-base-manifest + :file: standard/01-base/99-complete/98-base-manifest.sql + :template: true +- :name: 99-base-cleanup + :queries: + - :name: 99-base-cleanup + :file: standard/01-base/99-complete/99-base-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/README.md b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/README.md new file mode 100644 index 00000000..b98ca851 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/README.md @@ -0,0 +1,85 @@ +# Configuring the 01-base playbooks + +The Base module applies incremental logic to the atomic data, and produces deduplicated tables for subsequent modules to consume. This module is shared between the web and mobile models, with the `:model:` variable donating which model to run. + +`01-base-main.yml.tmpl` runs the main incremental logic. `99-base-complete.yml.tmpl` commits to the manifest, and runs cleanup steps afterwards. `XX-destroy-base.yml.tmpl` destroys all tables and manifests, for a complete rebuild. + +## Configuration quick reference + +### 01-base-main + +`:model:` name of model to run, web or mobile. + +`:input_schema:` name of atomic dataset + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `mobile_events_staged_test`). Must match entropy value used for all other modules in a given run. Populate with an empty string if no entropy value is needed. + +`:stage_next:` update staging tables - set to true if running the next module. If true, make sure that the next module includes a 'complete' step. + +`:start_date:` start date, used to seed manifest. + +`:lookback_window_hours:` defaults to 6. Period of time (in hours) to look before the latest event in manifest - to account for late arriving data, which comes out of order. + +`:days_late_allowed:` defaults to 3. Period of time (in days) for which we should include late data. If the difference between collector tstamps for the session start and new event is greater than this value, data for that session will not be processed. + +`:update_cadence_days:` defaults to 7. Period of time (in days) in the future (from the latest event in manifest) to look for new events. + +`:session_lookback_days:` defaults to 365. Period of time (in days) to limit scan on session manifest. Exists to improve performance of model when we have a lot of sessions. Should be set to as large a number as practical. + +`:mobile_context:` boolean - Mobile only. Configure whether to include data from the mobile context. + +`:geolocation_context:` boolean - Mobile only. Configure whether to include data from the geolocation context. + +`:application_context:` boolean - Mobile only. Configure whether to include data from the application context. + +`:screen_context:` boolean - Mobile only. Configure whether to include data from the application context. + +`:platform_filters:` array - Defaults to `web` and `mob` for the web and mobile models respectively. List of platforms to filter events by. + +`:app_id_filters:` array - Optional. List of `app_id` to filter events by. + +**Notes:** + +`days_late_allowed` can be extended in order to account for incidents which cause very late data - for example downtime on the front end. + +`session_lookback_days` can cause incorrect data or duplicates if misconfigured - if events arrive with existing session_ids for sessions which pre-date the `session_lookback_days`, this will cause an issue. However this is very unlikely as the lookback should be far greater than what can be reasonably expected for this behaviour from non-bot activity. + +### 99-base-complete + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `web_events_staged_test`). Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` options: `debug` - only keeps main tables. `trace` - keeps all tables. `all` - cleans up everything. + +`:ends_run:` set to true if there are no subsequent modules in the run, false otherwise. + +### XX-destroy-base + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `web_events_staged_test`). Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` should be set to `all` for a destroy. + +`:ends_run:` should be set to true for a destroy. + +## Order of execution + +Custom steps should run before `99-base-complete.yml.tmpl`, but after `01-base-main.yml.tmpl`, as follows: + +1: 01-base-main.yml.tmpl + +2: AA-my-custom-base-level-module.yml.tmpl + +3: 99-base-complete.yml.tmpl + +Note that one should take care if adding custom logic at this stage, since everything downstream depends on it. For example, if duplicates are introduced here, every downstream join is liable to both suffer performance issues, and increase the number of duplicates exponentially. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/XX-destroy-base.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/XX-destroy-base.yml.tmpl new file mode 100644 index 00000000..7f13aabd --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/01-base/XX-destroy-base.yml.tmpl @@ -0,0 +1,24 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: true +:steps: +- :name: 99-base-cleanup + :queries: + - :name: 99-base-cleanup + :file: standard/01-base/99-complete/99-base-cleanup.sql + :template: true +- :name: XX-destroy-base + :queries: + - :name: XX-destroy-base + :file: standard/01-base/XX-destroy/XX-destroy-base.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/01-screen-views-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/01-screen-views-main.yml.tmpl new file mode 100644 index 00000000..fafad6e7 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/01-screen-views-main.yml.tmpl @@ -0,0 +1,40 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :upsert_lookback_days: + :skip_derived: + :stage_next: true + :cluster_by: [] +:steps: +- :name: 01-functions-and-procedures + :queries: + - :name: 01-functions-and-procedures + :file: standard/00-setup/01-main/01-functions-and-procedures.sql + :template: true +- :name: 00-setup-screen-views + :queries: + - :name: 00-setup-screen-views + :file: standard/02-screen-views/01-main/00-setup-screen-views.sql + :template: true +- :name: 01-screen-views + :queries: + - :name: 01-screen-views + :file: standard/02-screen-views/01-main/01-screen-views.sql + :template: true +- :name: 02-screen-views-metadata + :queries: + - :name: 02-screen-views-metadata + :file: standard/02-screen-views/01-main/02-screen-views-metadata.sql + :template: true +- :name: 03-commit-screen-views + :queries: + - :name: 03-commit-screen-views + :file: standard/02-screen-views/01-main/03-commit-screen-views.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/99-screen-views-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/99-screen-views-complete.yml.tmpl new file mode 100644 index 00000000..9067574f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/99-screen-views-complete.yml.tmpl @@ -0,0 +1,18 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: false +:steps: +- :name: 99-screen-views-cleanup + :queries: + - :name: 99-screen-views-cleanup + :file: standard/02-screen-views/99-complete/99-screen-views-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/README.md b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/README.md new file mode 100644 index 00000000..a762265e --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/README.md @@ -0,0 +1,63 @@ +# Configuring the 02-screen-views playbooks + +The screen views module runs the standard mobile screen views model. It takes the `mobile_events_staged` table - produced by the Base module - as an input. + +`01-screen-views-main.yml.tmpl` runs the main mobile model logic. `99-screen-views-complete.yml.tmpl` runs the cleanup steps afterwards. `XX-destroy-screen-views.yml.tmpl` destroys all tables and manifests, for a complete rebuild. + +## Configuration quick reference + +### 01-screen-views-main + +`:input_schema:` name of atomic dataset + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `screen_views_test`). Must match entropy value used for all other modules in a given run. + +`:stage_next:` update staging tables - set to true if running the next module. If true, make sure that the next module includes a 'complete' step. + +`:upsert_lookback_days:` default 30. Period of time (in days) to look back over the production table in order to find rows to delete when upserting data. Where performance is not a concern, should be set to as long a value as possible. + +`:skip_derived:` default false. Set to true to skip insert to production screen views table. + +`:cluster_by:` array - default `[app_id, device_user_id, session_id]`. Columns used to cluster the `mobile_screen_views_staged` and `mobile_screen_views` tables. Override if your use case requires different clustering. Note clustering is defined during table creation and therefore to recluster pre-existing tables one must either a) drop and recompute the tables or b) copy the data over to new tables with the custom clustering applied. + +**Note:** `upsert_lookback_days` can produce duplicates if set to too short a window. + +### 99-screen-views-complete + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` options: `debug` - only keeps main tables. `trace` - keeps all tables. `all` - cleans up everything. + +`:ends_run:` set to true if there are no subsequent modules in the run, false otherwise. + +### XX-destroy-screen-views + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` should be set to "all" for a destroy. + +`:ends_run:` should be set to true for a destroy. + +## Order of execution + +Custom steps should run before `99-screen-views-complete.yml.tmpl`, but after `01-screen-views-main.yml.tmpl`, as follows: + +1: 01-screen-views-main.yml.tmpl + +2: AA-my-custom-screen-views-level-module.yml.tmpl + +3: 99-screen-views-complete.yml.tmpl + +Custom modules should produce tables which join to the screen views table rather than altering it where possible. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/XX-destroy-screen-views.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/XX-destroy-screen-views.yml.tmpl new file mode 100644 index 00000000..969e742b --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/02-screen-views/XX-destroy-screen-views.yml.tmpl @@ -0,0 +1,24 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: true +:steps: +- :name: 99-screen-views-cleanup + :queries: + - :name: 99-screen-views-cleanup + :file: standard/02-screen-views/99-complete/99-screen-views-cleanup.sql + :template: true +- :name: XX-destroy-screen-views + :queries: + - :name: XX-destroy-screen-views + :file: standard/02-screen-views/XX-destroy/XX-destroy-screen-views.sql + :template: true + diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/01-app-errors-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/01-app-errors-main.yml.tmpl new file mode 100644 index 00000000..4fc91672 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/01-app-errors-main.yml.tmpl @@ -0,0 +1,41 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :enabled: false + :entropy: "" + :upsert_lookback_days: + :skip_derived: + :stage_next: true + :cluster_by: [] +:steps: +- :name: 01-functions-and-procedures + :queries: + - :name: 01-functions-and-procedures + :file: standard/00-setup/01-main/01-functions-and-procedures.sql + :template: true +- :name: 00-setup-app-errors + :queries: + - :name: 00-setup-app-errors + :file: standard/03-optional-modules/01-app-errors/01-main/00-setup-app-errors.sql + :template: true +- :name: 01-app-errors + :queries: + - :name: 01-app-errors + :file: standard/03-optional-modules/01-app-errors/01-main/01-app-errors.sql + :template: true +- :name: 02-app-errors-metadata + :queries: + - :name: 02-app-errors-metadata + :file: standard/03-optional-modules/01-app-errors/01-main/02-app-errors-metadata.sql + :template: true +- :name: 03-commit-app-errors + :queries: + - :name: 03-commit-app-errors + :file: standard/03-optional-modules/01-app-errors/01-main/03-commit-app-errors.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/99-app-errors-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/99-app-errors-complete.yml.tmpl new file mode 100644 index 00000000..28449109 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/99-app-errors-complete.yml.tmpl @@ -0,0 +1,18 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: false +:steps: +- :name: 99-app-errors-cleanup + :queries: + - :name: 99-app-errors-cleanup + :file: standard/03-optional-modules/01-app-errors/99-complete/99-app-errors-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/README.md b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/README.md new file mode 100644 index 00000000..2ce197cb --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/README.md @@ -0,0 +1,65 @@ +# Configuring the 01-app-errors playbooks + +The app errors module runs the mobile application errors model. It takes the `mobile_events_staged` table - produced by the Base module - as an input. The modules' staged output, `mobile_app_errors_staged`, is used as an input to the sessions module. The app errors module is optional, however if disabled an empty staged output table will still be created so as to allow the sessions module to run. + +`01-app-errors-main.yml.tmpl` runs the main mobile model logic. `99-app-errors-complete.yml.tmpl` runs the cleanup steps afterwards. `XX-destroy-app-errors.yml.tmpl` destroys all tables and manifests, for a complete rebuild. + +## Configuration quick reference + +### 01-app-errors-main + +`:input_schema:` name of atomic dataset + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:enabled:` boolean - Toggles the module on/off. + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `mobile_app_errors_test`). Must match entropy value used for all other modules in a given run. + +`:stage_next:` update staging tables - set to true if running the next module. If true, make sure that the next module includes a 'complete' step. + +`:upsert_lookback_days:` default 30. Period of time (in days) to look back over the production table in order to find rows to delete when upserting data. Where performance is not a concern, should be set to as long a value as possible. + +`:skip_derived:` default false. Set to true to skip insert to production app errors table. + +`:cluster_by:` array - default `[app_id, device_user_id, session_id]`. Columns used to cluster the `mobile_app_errors_staged` and `mobile_app_errors` tables. Override if your use case requires different clustering. Note clustering is defined during table creation and therefore to recluster pre-existing tables one must either a) drop and recompute the tables or b) copy the data over to new tables with the custom clustering applied. + +**Note:** `upsert_lookback_days` can produce duplicates if set to too short a window. + +### 99-app-errors-complete + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` options: `debug` - only keeps main tables. `trace` - keeps all tables. `all` - cleans up everything. + +`:ends_run:` set to true if there are no subsequent modules in the run, false otherwise. + +### XX-destroy-app-errors + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` should be set to "all" for a destroy. + +`:ends_run:` should be set to true for a destroy. + +## Order of execution + +Custom steps should run before `99-app-errors-complete.yml.tmpl`, but after `01-app-errors-main.yml.tmpl`, as follows: + +1: 01-app-errors-main.yml.tmpl + +2: AA-my-custom-app-errors-level-module.yml.tmpl + +3: 99-app-errors-complete.yml.tmpl + +Custom modules should produce tables which join to the app errors table rather than altering it where possible. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/XX-destroy-app-errors.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/XX-destroy-app-errors.yml.tmpl new file mode 100644 index 00000000..368204af --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/03-optional-modules/01-app-errors/XX-destroy-app-errors.yml.tmpl @@ -0,0 +1,23 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: true +:steps: +- :name: 99-app-errors-cleanup + :queries: + - :name: 99-app-errors-cleanup + :file: standard/03-optional-modules/01-app-errors/99-complete/99-app-errors-cleanup.sql + :template: true +- :name: XX-destroy-app-errors + :queries: + - :name: XX-destroy-app-errors + :file: standard/03-optional-modules/01-app-errors/XX-destroy/XX-destroy-app-errors.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/01-sessions-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/01-sessions-main.yml.tmpl new file mode 100644 index 00000000..65f40d28 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/01-sessions-main.yml.tmpl @@ -0,0 +1,55 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :upsert_lookback_days: + :stage_next: true + :skip_derived: + :cluster_by: [] +:steps: +- :name: 01-functions-and-procedures + :queries: + - :name: 01-functions-and-procedures + :file: standard/00-setup/01-main/01-functions-and-procedures.sql + :template: true +- :name: 00-setup-sessions + :queries: + - :name: 00-setup-sessions + :file: standard/04-sessions/01-main/00-setup-sessions.sql + :template: true +- :name: 01-sessions-aggs + :queries: + - :name: 01-sessions-aggs + :file: standard/04-sessions/01-main/01-sessions-aggs.sql + :template: true +- :name: 02-sessions-sv-details + :queries: + - :name: 02-sessions-sv-details + :file: standard/04-sessions/01-main/02-sessions-sv-details.sql + :template: true +- :name: 03-sessions + :queries: + - :name: 03-sessions + :file: standard/04-sessions/01-main/03-sessions.sql + :template: true +- :name: 04-sessions-metadata + :queries: + - :name: 04-sessions-metadata + :file: standard/04-sessions/01-main/04-sessions-metadata.sql + :template: true +- :name: 05-sessions-prep-manifest + :queries: + - :name: 05-sessions-prep-manifest + :file: standard/04-sessions/01-main/05-sessions-prep-manifest.sql + :template: true +- :name: 06-commit-sessions + :queries: + - :name: 06-commit-sessions + :file: standard/04-sessions/01-main/06-commit-sessions.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/99-sessions-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/99-sessions-complete.yml.tmpl new file mode 100644 index 00000000..f43666f3 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/99-sessions-complete.yml.tmpl @@ -0,0 +1,23 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: false +:steps: +- :name: 98-truncate-upstream-staged + :queries: + - :name: 98-truncate-upstream-staged + :file: standard/04-sessions/99-complete/98-truncate-upstream-staged.sql + :template: true +- :name: 99-sessions-cleanup + :queries: + - :name: 99-sessions-cleanup + :file: standard/04-sessions/99-complete/99-sessions-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/README.md b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/README.md new file mode 100644 index 00000000..17176125 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/README.md @@ -0,0 +1,61 @@ +# Configuring the 04-sessions playbooks + +The sessions module runs the standard mobile sessions model. It takes `mobile_events_staged`, `mobile_screen_views_staged` and `mobile_app_errors_staged` as inputs. + +`01-sessions-main.yml.tmpl` runs the main mobile model logic. `99-sessions-complete.yml.tmpl` truncates the input tables, and runs cleanup steps afterwards. `XX-destroy-sessions.yml.tmpl` destroys all tables and manifests, for a complete rebuild. + +## Configuration quick reference + +### 01-sessions-main + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `mobile_sessions_test`). Must match entropy value used for all other modules in a given run. + +`:stage_next:` update staging tables - set to true if running the next module. If true, make sure that the next module includes a 'complete' step. + +`:upsert_lookback_days:` default 30. Period of time (in days) to look back over the production table in order to find rows to delete when upserting data. Where performance is not a concern, should be set to as long a value as possible. + +`:skip_derived:` default false. Set to true to skip insert to production mobile sessions table. + +`:cluster_by:` array - default `[app_id, device_user_id, session_id]`. Columns used to cluster the `mobile_sessions` tables. Override if your use case requires different clustering. Note clustering is defined during table creation and therefore to recluster pre-existing tables one must either a) drop and recompute the tables or b) copy the data over to new tables with the custom clustering applied. + +**Note:** `upsert_lookback_days` can produce duplicates if set to too short a window. + +### 99-sessions-complete + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` options: `debug` - only keeps main tables. `trace` - keeps all tables. `all` - cleans up everything. + +`:ends_run:` set to true if there are no subsequent modules in the run, false otherwise. + +### XX-destroy-sessions + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` should be set to `all` for a destroy. + +`:ends_run:` should be set to true for a destroy. + +## Order of execution + +Custom steps should run before `99-sessions-complete.yml.tmpl`, but after `01-sessions-main.yml.tmpl`, as follows: + +1: 01-sessions-main.yml.tmpl + +2: AA-my-custom-sessions-level-module.yml.tmpl + +3: 99-sessions-complete.yml.tmpl + +Custom modules should produce tables which join to the sessions table rather than altering it where possible. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/XX-destroy-sessions.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/XX-destroy-sessions.yml.tmpl new file mode 100644 index 00000000..4f398b97 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/04-sessions/XX-destroy-sessions.yml.tmpl @@ -0,0 +1,23 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: true +:steps: +- :name: 99-sessions-cleanup + :queries: + - :name: 99-sessions-cleanup + :file: standard/04-sessions/99-complete/99-sessions-cleanup.sql + :template: true +- :name: XX-destroy-sessions + :queries: + - :name: XX-destroy-sessions + :file: standard/04-sessions/XX-destroy/XX-destroy-sessions.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/01-users-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/01-users-main.yml.tmpl new file mode 100644 index 00000000..f6ccac49 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/01-users-main.yml.tmpl @@ -0,0 +1,64 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :skip_derived: + :upsert_lookback_days: + :cluster_by: [] +:steps: +- :name: 01-functions-and-procedures + :queries: + - :name: 01-functions-and-procedures + :file: standard/00-setup/01-main/01-functions-and-procedures.sql + :template: true +- :name: 00-setup-users + :queries: + - :name: 00-setup-users + :file: standard/05-users/01-main/00-setup-users.sql + :template: true +- :name: 01-userids-this-run + :queries: + - :name: 01-userids-this-run + :file: standard/05-users/01-main/01-userids-this-run.sql + :template: true +- :name: 02-users-limits + :queries: + - :name: 02-users-limits + :file: standard/05-users/01-main/02-users-limits.sql + :template: true +- :name: 03-users-sessions-this-run + :queries: + - :name: 03-users-sessions-this-run + :file: standard/05-users/01-main/03-users-sessions-this-run.sql + :template: true +- :name: 04-users-aggs + :queries: + - :name: 04-users-aggs + :file: standard/05-users/01-main/04-users-aggs.sql + :template: true +- :name: 05-users-lasts + :queries: + - :name: 05-users-lasts + :file: standard/05-users/01-main/05-users-lasts.sql + :template: true +- :name: 06-users + :queries: + - :name: 06-users + :file: standard/05-users/01-main/06-users.sql + :template: true +- :name: 07-users-metadata + :queries: + - :name: 07-users-metadata + :file: standard/05-users/01-main/07-users-metadata.sql + :template: true +- :name: 08-commit-users + :queries: + - :name: 08-commit-users + :file: standard/05-users/01-main/08-commit-users.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/99-users-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/99-users-complete.yml.tmpl new file mode 100644 index 00000000..2a90603f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/99-users-complete.yml.tmpl @@ -0,0 +1,23 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: true +:steps: +- :name: 98-manifest-and-truncate + :queries: + - :name: 98-manifest-and-truncate + :file: standard/05-users/99-complete/98-manifest-and-truncate.sql + :template: true +- :name: 99-users-cleanup + :queries: + - :name: 99-users-cleanup + :file: standard/05-users/99-complete/99-users-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/README.md b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/README.md new file mode 100644 index 00000000..66f613d4 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/README.md @@ -0,0 +1,55 @@ +# Configuring the 05-users playbooks + +The users module runs the standard mobile sessions model - it takes the `mobile_sessions_userid_manifest_staged` table - produced by the Sessions module - as an input. + +`01-users-main.yml.tmpl` runs the main mobile model logic. `99-users-complete.yml.tmpl` truncates the input table, and runs cleanup steps afterwards. `XX-destroy-users.yml.tmpl` destroys all tables and manifests, for a complete rebuild. + +## Configuration quick reference + +### 01-users-main + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `mobile_users_test`). Must match entropy value used for all other modules in a given run. + +`:skip_derived:` default false. Set to true to skip insert to production users table. + +`:cluster_by:` array - default `[device_user_id]`. Columns used to cluster the `mobile_users` tables. Override if your use case requires different clustering. Note clustering is defined during table creation and therefore to recluster pre-existing tables one must either a) drop and recompute the tables or b) copy the data over to new tables with the custom clustering applied. + +### 99-users-complete + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` options: `debug` - only keeps main tables. `trace` - keeps all tables. `all` - cleans up everything. + +`:ends_run:` set to true if there are no subsequent modules in the run, false otherwise. + +### XX-destroy-users + +`:scratch_schema:` name of scratch dataset + +`:output_schema:` name of derived dataset + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` should be set to `all` for a destroy. + +`:ends_run:` should be set to true for a destroy. + +## Order of execution + +Custom steps should run before `99-users-complete.yml.tmpl`, but after `01-users-main.yml.tmpl`, as follows: + +1: 01-users-main.yml.tmpl + +2: AA-my-custom-users-level-module.yml.tmpl + +3: 99-users-complete.yml.tmpl + +Custom modules should produce tables which join to the users table rather than altering it where possible. diff --git a/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/XX-destroy-users.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/XX-destroy-users.yml.tmpl new file mode 100644 index 00000000..359e7f77 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/standard/05-users/XX-destroy-users.yml.tmpl @@ -0,0 +1,23 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :scratch_schema: scratch + :output_schema: derived + :entropy: "" + :cleanup_mode: all + :ends_run: true +:steps: +- :name: 99-users-cleanup + :queries: + - :name: 99-users-cleanup + :file: standard/05-users/99-complete/99-users-cleanup.sql + :template: true +- :name: XX-destroy-users + :queries: + - :name: XX-destroy-users + :file: standard/05-users/XX-destroy/XX-destroy-users.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/01-staging-reconciliation-main.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/01-staging-reconciliation-main.yml.tmpl new file mode 100644 index 00000000..3cdcb16f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/01-staging-reconciliation-main.yml.tmpl @@ -0,0 +1,17 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :entropy: "" + :app_errors: false +:steps: +- :name: 00-staging-reconciliation + :queries: + - :name: 00-staging-reconciliation + :file: tests/00-staging-reconciliation/01-main/00-staging-reconciliation.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/99-staging-reconciliation-complete.yml.tmpl b/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/99-staging-reconciliation-complete.yml.tmpl new file mode 100644 index 00000000..f98569c9 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/99-staging-reconciliation-complete.yml.tmpl @@ -0,0 +1,17 @@ +:targets: +- :name: + :type: bigquery + :project: + :region: +:variables: + :model_version: bigquery/mobile/1.0.0 + :model: mobile + :scratch_schema: scratch + :entropy: "" + :cleanup_mode: all +:steps: +- :name: 99-staging-reconciliation-cleanup + :queries: + - :name: 99-staging-reconciliation-cleanup + :file: tests/00-staging-reconciliation/99-complete/99-staging-reconciliation-cleanup.sql + :template: true diff --git a/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/README.md b/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/README.md new file mode 100644 index 00000000..036ed9bc --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/playbooks/tests/00-staging-reconciliation/README.md @@ -0,0 +1,24 @@ +# Configuring the 00-staging-reconciliation playbooks + +The staging reconciliation module reconciles all the `_staging` output tables from the standard modules. It outputs to a scratch `mobile_staging_reconciliation` table which is then validated using Great Expectations. For all tests to pass, every columns in `mobile_staging_reconciliation` must equal 0. + +`01-staging-reconciliation-main.yml.tmpl` runs the main reconciliation logic. `99-staging-reconciliation-complete.yml.tmpl` drops the scratch table. + +## Configuration quick reference + +### 01-staging-reconciliation-main + +`:scratch_schema:` name of scratch schema + +`:entropy:` string to append to all tables, to test without affecting prod tables (eg. `_test` produces tables like `mobile_users_test`). Must match entropy value used for all other modules in a given run. + +`:app_errors:` boolean- default false. Set to true if the App Errors module is enabled within the main mobile model. By enabling, the `_staged` output of the App Errors module is checked as part of the reconciliation. + +### 99-staging-reconciliation-complete + +`:scratch_schema:` name of scratch schema + +`:entropy:` string to append to all tables, to test without affecting prod tables. Must match entropy value used for all other modules in a given run. + +`:cleanup_mode:` options: `debug` - only keeps main tables. `trace` - keeps all tables. `all` - cleans up everything. + diff --git a/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/01-session-goals-staged.sql b/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/01-session-goals-staged.sql new file mode 100644 index 00000000..6bf74fa8 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/01-session-goals-staged.sql @@ -0,0 +1,36 @@ +-- 1. Aggregate with a drop and recompute logic + +CREATE OR REPLACE TABLE {{.scratch_schema}}.session_goals_staged{{.entropy}} AS ( + + WITH goals AS ( + + SELECT + sv.session_id, + LOGICAL_OR(sv.screen_view_name = 'registration') AS has_started_registration, + LOGICAL_OR(sv.screen_view_name = 'my_account') AS has_completed_registration, + LOGICAL_OR(sv.screen_view_name = 'search_results') AS has_used_search, + LOGICAL_OR(sv.screen_view_name = 'products') AS has_viewed_products + + FROM + {{.scratch_schema}}.mobile_screen_views_staged{{.entropy}} sv + + GROUP BY 1 + + ) + + SELECT + s.session_id, + s.start_tstamp, + g.has_started_registration, + g.has_completed_registration, + g.has_used_search, + g.has_viewed_products, + IF(g.has_started_registration AND g.has_completed_registration AND g.has_used_search AND g.has_viewed_products, TRUE, FALSE) AS has_completed_goals + + FROM + {{.scratch_schema}}.mobile_sessions_this_run{{.entropy}} AS s --select from mobile_sessions_this_run to get start_tstamp. Screen view might not be start of session + INNER JOIN goals AS g + ON s.session_id = g.session_id + +); + diff --git a/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/02-session-goals-upsert.sql b/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/02-session-goals-upsert.sql new file mode 100644 index 00000000..f3bcc4ae --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/02-session-goals-upsert.sql @@ -0,0 +1,12 @@ + +-- 2. Commit table procedure handles committing to prod, including table creation, and creation of new columns if 'automigrate' is set to TRUE + +CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'session_goals_staged{{.entropy}}', -- sourceTable + '{{.output_schema}}', -- targetDataset + 'session_goals{{.entropy}}', -- targetTable + 'session_id', -- joinKey + 'start_tstamp', -- partitionKey + TRUE); -- automigrate + +-- If we like, we can manually create and update our production table instead. diff --git a/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/99-session-goals-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/99-session-goals-cleanup.sql new file mode 100644 index 00000000..ecf7b497 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/custom/04-session-goals/99-session-goals-cleanup.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS {{.scratch_schema}}.session_goals_staged{{.entropy}}; diff --git a/mobile/v1/bigquery/sql-runner/sql/custom/README.md b/mobile/v1/bigquery/sql-runner/sql/custom/README.md new file mode 100644 index 00000000..5e954f0c --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/custom/README.md @@ -0,0 +1,115 @@ +# Adding custom sql + +This directory contains two examples of custom modules. The directories follow the same naming convention as the standard module, whereby each directory is assigned a number corresponding to the level of aggregation that the SQL is concerned with. In addition to these examples, below is a guide to creating custom modules. + +## Guidelines & Best Practice + +The v1 Model's modular structure allows for custom SQL modules to leverage the model's incrementalisation logic, and operate as 'plugins' to compliment the standard model. This can be achieved by using the `_staged` tables as an input, and producing custom tables which may join too the standard model's main production tables (for example, to aggregate custom contexts to a screen_view level), or provide a separate level of aggregation (for example a custom user interaction). + +The standard modules carry out the heavy lifting in establishing an incremental structure and providing the core logic for the most common mobile aggregation use cases. It also allows custom modules to be plugged in without impeding the maintainence of standard modules. + +The following best practices should be followed to ensure that updates and bugfixes to the model can be rolled out with minimal complication: + +- Custom modules should not modify the `_staged` tables +- Custom modules should not modify the standard model's production tables (eg `mobile_screen_views`, `mobile_sessions` and `mobile_users`) - adding extra fields to the production tables can be achieved by producing a separate table which joins to the production table. +- Custom modules should not modify any manifest tables. +- Customisations should not modify the SQL in the standard model - they should only comprise of a new set of SQL statements, which produce a separate table. +- The logic for custom SQL should be idempotent, and restart-safe - in other words, it should be written in such a way that a failure mid-way, or a re-run of the model will not change the deterministic output. + +In short, the standard modules can be treated as the source code for a distinct piece of software, and custom modules can be treated as self-maintained, additive plugins - in much the same way as a Java package may permit one to leverage public classes in their own API, and provide an entry point for custom programs to run, but will not permit one to modify the original API. + +The `_staged` and `_this_run` tables are considered part of the 'public' class of tables in this model structure, and so we can give assurances that non-breaking releases (ie. any v1.X release) won't alter them. The other tables may be used in custom SQL, but their logic and structure may change from release to release, or they may be removed. If one does use a scratch table in custom logic, any breaking changes can be mitigated by either amending the custom logic to suit, or copying the relevant steps from an old version of the model into the custom module. (However this will rarely be necessary). + +## Interacting with the model structure + +Each standard module produces a `_staged` table which serves as the input to the next module. This should also serve as the input to custom modules. For example, the `01-base` module produces the `scratch.mobile_events_staged` table - this is a subset of the pipeline's `events`, with the addition of various optional contexts, containing only data from sessions which contain new data in this run of the model. To aggregate atomic data, one can read from the `scratch.mobile_events_staged` table. + +Each standard module also contains a `99-{module}-complete` playbook, which completes the incremental logic for the previous step by truncating the input\*. This should run after both the custom and standard module have run. So, for our use case of aggregating atomic data, we would: + + 1. run the `01-base` module, to produce the `mobile_events_staged` table + 2. run both the standard module and custom modules + 3. Run the `99-sessions-complete` playbook (which truncates `events_staged`) + +For less common requirements, one can opt not to run a given standard module, or to wait until the end of the model before running all `99-{module}-complete` steps. For example, if the custom logic requires, the following order of operations is acceptable: + + 1. run `01-base` + 2. run `02-screen-views` + 3. run `02a-custom-sv-entity-aggregations` + 4. run `03-sessions` + 5. run `03a-custom-sessions-aggregations` + 6. run `04a-custom-users-aggregations` (note no standard users module) + 7. run `99-screen-views-complete` + 8. run `99-sessions-complete` + 9. run `99-users-complete` + +\* The exception to this is the screen views and optional modules. Their `99-{module}-complete` playbook does not truncate their input, `mobile_events_staged`. This is because the session module requires `mobile_events_staged` as an input and therefore the truncation occurs within the sessions module. + +## Producing custom tables + +### Design + +As mentioned above, custom modules should be written as additive-only, and should produce separate custom tables, as distinct from the tables produced by the standard module. If the requirement of the customisation is to add fields to compliment the production tables, this can be done in one of two ways: a) create a table which joins to the standard table on its joinkey, or b) create a custom table which duplicates the interesting fields from the standard model, to a new table which also contains the custom data. + +For example, at screen views level, one can either: + +a) Create a new table `mobile_screen_views_additions`, which is one-row-per screen_view_id, and contains the relevant data. + +or b) Create a new table `mobile_screen_views_custom`, which contains the relevant customisations joined to the relevant fields from `mobile_screen_views`. + +One should not amend `mobile_screen_views` directly. + +### Implementation + +The easiest way to integrate with the model's incremental structure is to implement a three-step process for custom tables: + +1. Write the relevant aggregation logic using drop and recompute logic, to produce a `_staged` table (only interacting with the input data) +2. Use the provided `derived.commit_table()` procedure to commit to the production table. + +The standard model's incrementalisation logic ensures that all relevant data for a given row will be in the input, and only data for sessions which contain new data will be included. `commit_table` will do the job of creating the production table, or adding new columns as required. Note that `commit_table` requires a time key for partitions. + +Users who wish to specify exact constraints or additional features of the production table (eg. cluster keys) may create it first in SQL using a `CREATE TABLE statement`. + +An example of this can be seen in the `04-session-goals` directory, which makes use of the commit table procedure to update the production table. + +The arguments to the commit table procedure are as follows: + +```SQL +derived.commit_table( + 'scratch', -- sourceDataset + 'session_goals_staged', -- sourceTable + 'derived', -- targetDataset + 'session_goals', -- targetTable + 'session_id', -- joinKey + 'start_tstamp', -- partitionKey + TRUE); -- automigrate +``` + +If `automigrate` is TRUE, tables which don't exist will be created, and new columns will be added to the target table. If FALSE, the query will fail without committing unless the target table exists and columns match exactly. + +## Advanced usage - variable scheduling and non-standard requirements + +As mentioned above, the model's structure allows for some more complex use cases which may require a nuanced approach. Where this is required, it is advisable to begin by setting up the model in a standard way first, iterate upon it insofar as possible, and move to more complex requirements once the nuance is well understood. + +### Variable scheduling of modules with customisations + +The `_staging` tables update incrementally, and so it is possible to vary the schedule of different modules of the model without impact on the incremental structure. For example, one can run `01-base` and `02-screen-views` every hour, `04-sessions` once a day and `05-users` once a week. + +This is possible because every time `02-screen-views` runs, it incrementally updates the `mobile_screen_views_staged` to include all new data since the last run of the `04-sessions` module. + +Sometimes, one might require custom modules to run on a more frequent schedule than their standard counterparts. For example, one might wish to run a custom module to aggregate transaction events (from atomic data) every 30 mins, but only run the `02-screen-views` module once a day. + +This is possible using the above described structure - as long as the custom module is written to `DELETE` and `INSERT`, then it will remain accurate with every run. However, it is inefficient - every run of the custom module will process _all_ data since the last run of the `02-screen-views` module, including the data that's already been processed in the custom module. + +To allow for this kind of requirement, each module _also_ produces a `_this_run` table, which contains only the data for the _current_ run of the module. If one requires a custom module to run more frequently than a standard one - and aims for the most efficient means of doing so, one may use the `_this_run` table as an input. + +Do note that this requires that the custom module runs every time the previous module runs. So if using `_this_run` as an input, it is acceptable to run two jobs as follows: + +1. `01-base`, `02a-custom` +2. `01-base`, `02a-custom`, `02-screen-views` + +But not as follows: + +1. `01-base`, `02a-custom` +2. `01-base`, `02-screen-views` + +Since in the latter, running job 2 processes data which should be included in `02a-custom`, and that data is never persisted to the input of `02a-custom`. diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/01-main/00-setup-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/01-main/00-setup-metadata.sql new file mode 100644 index 00000000..913e452f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/01-main/00-setup-metadata.sql @@ -0,0 +1,47 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- A table storing an identifier for this run of a model - used to identify runs of the model across multiple modules/steps (eg. base, page views share this id per run) +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}} ( + run_id TIMESTAMP +); + +-- When base runs, it's always the first module. So it's safe to just truncate here. +TRUNCATE TABLE {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}}; + +INSERT INTO {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}} ( + SELECT + CURRENT_TIMESTAMP() +); + +-- Permanent metadata table +CREATE TABLE IF NOT EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}} ( + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +) +PARTITION BY DATE(run_start_tstamp); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/01-main/01-functions-and-procedures.sql b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/01-main/01-functions-and-procedures.sql new file mode 100644 index 00000000..2b380c13 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/01-main/01-functions-and-procedures.sql @@ -0,0 +1,328 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Function to count mismatched columns between source and target tables: +CREATE OR REPLACE FUNCTION {{.output_schema}}.columnCheckQuery (sourceDataset STRING, + sourceTable STRING, + targetDataset STRING, + targetTable STRING) +AS( + (SELECT CONCAT("""SELECT + SUM(CASE WHEN a.column_name IS NULL THEN 1 ELSE 0 END) AS missing_in_source, + SUM(CASE WHEN b.column_name IS NULL THEN 1 ELSE 0 END) AS missing_in_target + + FROM + (SELECT column_name, data_type, ordinal_position FROM """, sourceDataset, + """.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '""", sourceTable, + """') a + FULL JOIN + (SELECT column_name, data_type, ordinal_position FROM """, targetDataset, + """.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '""", targetTable, + """') b + ON + a.column_name = b.column_name + AND a.ordinal_position = b.ordinal_position + """) + ) +); + +CREATE OR REPLACE PROCEDURE {{.output_schema}}.commit_table (sourceDataset STRING, + sourceTable STRING, + targetDataset STRING, + targetTable STRING, + joinKey STRING, + partitionKey STRING, + automigrate BOOLEAN) +BEGIN + DECLARE COLS_NOT_IN_SOURCE, COLS_NOT_IN_TARGET INT64; + DECLARE SOURCE_PATH, TARGET_PATH, DELETE_QUERY STRING; + DECLARE COLUMN_ADDITIONS ARRAY; + DECLARE LOWER_LIMIT TIMESTAMP; + + SET (SOURCE_PATH, TARGET_PATH) = (CONCAT(sourceDataset, '.', sourceTable), CONCAT(targetDataset, '.', targetTable)); + + IF automigrate THEN + + EXECUTE IMMEDIATE + format("""CREATE TABLE IF NOT EXISTS %s + PARTITION BY DATE(%s) + AS (SELECT * FROM %s WHERE FALSE);""", TARGET_PATH, partitionKey, SOURCE_PATH); + + END IF; + + -- Check if any columns are missing from either source or target table + EXECUTE IMMEDIATE {{.output_schema}}.columnCheckQuery(sourceDataset, sourceTable, targetDataset, targetTable) INTO COLS_NOT_IN_SOURCE, COLS_NOT_IN_TARGET; + + -- If source is missing a column, throw. + IF COLS_NOT_IN_SOURCE > 0 THEN + RAISE USING MESSAGE = 'ERROR: Source table is missing column(s) which exist in target table.'; + + ELSEIF COLS_NOT_IN_TARGET > 0 AND NOT automigrate THEN + RAISE USING MESSAGE = 'ERROR: Target table is missing column(s), but automigrate is disabled.'; + + -- If target is missing a column, and automigrate is switched on, add the columns + ELSEIF COLS_NOT_IN_TARGET > 0 AND automigrate THEN + + -- Query information schema to produce an ordered array of strings for columns and their types. + EXECUTE IMMEDIATE + format(""" + WITH columns AS(SELECT + CONCAT(a.column_name, ' ', a.data_type) AS col_with_type, + a.ordinal_position + + FROM + (SELECT column_name, data_type, ordinal_position FROM %s.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '%s') a + LEFT JOIN + (SELECT column_name, data_type, ordinal_position FROM %s.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '%s') b + ON + a.column_name = b.column_name + AND a.ordinal_position = b.ordinal_position + + WHERE b.column_name IS NULL) + + SELECT ARRAY(SELECT col_with_type FROM columns ORDER BY ordinal_position); + """, sourceDataset, sourceTable, targetDataset, targetTable) INTO COLUMN_ADDITIONS; + + --- Execute add column statements + EXECUTE IMMEDIATE format( + """ALTER TABLE %s + ADD COLUMN IF NOT EXISTS %s""", TARGET_PATH, ARRAY_TO_STRING(COLUMN_ADDITIONS, ', ADD COLUMN IF NOT EXISTS ')); + + END IF; + + -- Get lower limit + EXECUTE IMMEDIATE + format("SELECT TIMESTAMP_SUB(MIN(%s), INTERVAL {{or .upsert_lookback_days 30}} DAY) FROM %s", partitionKey, TARGET_PATH) INTO LOWER_LIMIT; + + -- Perform DELETE <> INSERT transaction + BEGIN + + -- Weird way to do it but table names can't go in 'USING' variables, but the CONCAT is v messy with duplicated 'Key' variable + + -- TODO: See if there's a cleaner way to go about this + SET DELETE_QUERY = CONCAT("""DELETE FROM """, TARGET_PATH, """ WHERE """, joinKey, """ IN (SELECT """, joinKey, """ FROM """, SOURCE_PATH, + """) AND """, partitionKey, """ >= @LowerLimit;"""); + + EXECUTE IMMEDIATE + DELETE_QUERY + USING LOWER_LIMIT AS LowerLimit; + + EXECUTE IMMEDIATE + format("""INSERT %s (SELECT * FROM %s);""", TARGET_PATH, SOURCE_PATH); + + END; +END; + +-- Extracts first element of a context array or an unstructured event struct. +-- Returns ARRAY of STRUCTs mapping fields to their paths. The path is a COALSCE across columns versions, ordered by column version +-- e.g. COALESCE(contexts_com_snowplowanalytics_snowplow_mobile_context_1_0_1[SAFE_OFFSET(0)].device_model, contexts_com_snowplowanalytics_snowplow_mobile_context_1_0_0[SAFE_OFFSET(0)].device_model, NULL) +-- Currently only works if field names aren't duplicated, and all fields are top-level (ie no arrays and structs atm) + +CREATE OR REPLACE PROCEDURE {{.output_schema}}.combine_field_versions (source_schema STRING, + source_table STRING, + source_fields STRING, -- Array of fields to select. STRING to allow for concat. Use quoted array i.e. '["id"]' + columns_prefix STRING, -- Prefix of columns to concat across versions + rename_fields_yn BOOLEAN, -- Option to rename fields + renamed_fields STRING, -- Array of names to rename fields to. STRING to allow for concat. Use quoted array i.e. '["session_id"]' + OUT FIELDS ARRAY>) -- Returns ARRAY of STRUCTs, mapping fields to paths. + +BEGIN + + DECLARE SELECTOR, FIELD_VERSIONS_QUERY STRING; + DECLARE NUM_SOURCE_FIELDS, NUM_RENAMED_FIELDS INT64; + + --If renaming fields, check the array lengths are the same between source_fields and renamed_fields + IF rename_fields_yn THEN + + SET NUM_SOURCE_FIELDS = (ARRAY_LENGTH(ARRAY(SELECT * FROM UNNEST(SPLIT(SUBSTR(source_fields, 2 , LENGTH(source_fields) - 2)))))); + SET NUM_RENAMED_FIELDS = (ARRAY_LENGTH(ARRAY(SELECT * FROM UNNEST(SPLIT(SUBSTR(renamed_fields, 2 , LENGTH(renamed_fields) - 2)))))); + + IF NUM_SOURCE_FIELDS != NUM_RENAMED_FIELDS THEN + RAISE USING MESSAGE = 'ERROR: Source field and renamed field arrays are not the same length'; + END IF; + + END IF; + + -- Determines DTYPE of column and therefore suitable method to select fields from column + SET SELECTOR = ( + SELECT + CASE WHEN columns_prefix LIKE 'contexts%' THEN "'[SAFE_OFFSET(0)].'" + WHEN columns_prefix LIKE 'unstruct%' THEN "'.'" END + ); + + IF SELECTOR IS NULL THEN + RAISE USING MESSAGE = 'ERROR: Unrecognized column type'; + END IF; + + SET FIELD_VERSIONS_QUERY = CONCAT( + """WITH field_mapping AS ( + SELECT + source_field, + IF(""",rename_fields_yn,""", """,renamed_fields,"""[OFFSET(source_field_offset)], source_field) AS target_field + + FROM UNNEST(""",source_fields,""") AS source_field WITH OFFSET AS source_field_offset + ) + + , column_field_paths AS ( + SELECT + field_path, + column_name + + FROM """,source_schema,""".INFORMATION_SCHEMA.COLUMN_FIELD_PATHS + WHERE table_name = '""",source_table,"""' + AND column_name LIKE CONCAT('""",columns_prefix,"""', '%') + AND ARRAY_LENGTH(SPLIT(field_path, '.')) = 2 + AND SPLIT(field_path, '.')[SAFE_OFFSET(1)] IN (SELECT * FROM UNNEST(""",source_fields,""")) + AND data_type NOT LIKE 'STRUCT%' + AND data_type NOT LIKE 'ARRAY%' + ) + + , fields AS ( + SELECT + SPLIT(field_path, '.')[SAFE_OFFSET(1)] AS field_name, + ARRAY_AGG(CONCAT(column_name,""", SELECTOR, """, SPLIT(field_path, '.')[SAFE_OFFSET(1)]) ORDER BY column_name DESC) AS paths + + FROM column_field_paths + GROUP BY 1 + ) + + , renamed_fields AS ( + SELECT + fm.target_field AS field_name, + CONCAT('COALESCE(', ARRAY_TO_STRING(f.paths, ', '), ', NULL) ') AS paths_coalesce + + FROM field_mapping fm -- using mapping as spine. This ensure the order of the output is the same as the input. + LEFT JOIN fields f + ON fm.source_field = f.field_name + WHERE f.paths IS NOT NULL -- ignore inputted field if no path present + ) + + SELECT ARRAY_AGG(STRUCT(field_name, paths_coalesce)) AS fields + + FROM renamed_fields + + """); + + EXECUTE IMMEDIATE FIELD_VERSIONS_QUERY INTO FIELDS; + +END; + +-- For every STRUCT in the input ARRAY, concats path to the field name to generate a column name. All columns are then concat together into a comma separated string. +CREATE OR REPLACE PROCEDURE {{.output_schema}}.concat_fields (FIELDS ARRAY>, + OUT FIELDS_CONCAT STRING) + +BEGIN + + SET FIELDS_CONCAT = (SELECT ARRAY_TO_STRING(ARRAY_AGG(CONCAT(paths_coalesce, ' AS ', field_name)), ',') FROM UNNEST(FIELDS)); + +END; + +-- Used to dynamically select the correct mobile context fields from events table. +-- MOBILE_CONTEXT_COLUMNS = Coalesce of fields within context e.g. "COALESCE(contexts_com_snowplowanalytics_snowplow_mobile_context_1_0_1[SAFE_OFFSET(0)].device_manufacturer, contexts_com_snowplowanalytics_snowplow_mobile_context_1_0_0[SAFE_OFFSET(0)].device_manufacturer) AS device_manufacturer" +CREATE OR REPLACE PROCEDURE {{.output_schema}}.mobile_mobile_context_fields (context_enabled BOOLEAN, + OUT MOBILE_CONTEXT_COLUMNS STRING) + +BEGIN + + DECLARE MOBILE_FIELDS ARRAY>; + --If mobile context enabled, find all fields across context schema versions, else return NULL fields. + IF context_enabled THEN + + CALL {{.output_schema}}.combine_field_versions( + '{{.input_schema}}', -- source_schema + 'events', -- source_table + '["device_manufacturer","device_model","os_type","os_version","android_idfa","apple_idfa","apple_idfv","carrier","open_idfa","network_technology","network_type"]', --source_fields. Quoted array to allow for concat + 'contexts_com_snowplowanalytics_snowplow_mobile_context_1_0', -- columns_prefix + false, -- rename_y_n + '[""]', -- rename_fields + MOBILE_FIELDS -- returns all fields in context + path + ); + + CALL {{.output_schema}}.concat_fields( + MOBILE_FIELDS, + MOBILE_CONTEXT_COLUMNS -- returned context coalesce columns + ); + ELSE + + SET MOBILE_CONTEXT_COLUMNS = ( + """CAST(NULL AS STRING) AS device_manufacturer, + CAST(NULL AS STRING) AS device_model, + CAST(NULL AS STRING) AS os_type, + CAST(NULL AS STRING) AS os_version, + CAST(NULL AS STRING) AS android_idfa, + CAST(NULL AS STRING) AS apple_idfa, + CAST(NULL AS STRING) AS apple_idfv, + CAST(NULL AS STRING) AS carrier, + CAST(NULL AS STRING) AS open_idfa, + CAST(NULL AS STRING) AS network_technology, + CAST(NULL AS STRING) AS network_type"""); + + END IF; + +END; + + +CREATE OR REPLACE PROCEDURE {{.output_schema}}.mobile_app_errors_fields (OUT APP_ERRORS_EVENTS_COLUMNS STRING) + +BEGIN + + DECLARE APP_ERROR_FIELDS ARRAY>; + + CALL {{.output_schema}}.combine_field_versions( + '{{.scratch_schema}}', -- source_schema + 'mobile_events_staged', -- source_table + '["message","programming_language","class_name","exception_name","is_fatal","line_number","stack_trace","thread_id","thread_name"]', --source_fields. Quoted array to allow for concat + 'unstruct_event_com_snowplowanalytics_snowplow_application_error_1_0', -- columns_prefix + false, -- rename_y_n + '[""]', -- rename_fields + APP_ERROR_FIELDS -- returns all fields in event + path + ); + + CALL {{.output_schema}}.concat_fields( + APP_ERROR_FIELDS, + APP_ERRORS_EVENTS_COLUMNS -- returned event coalesce columns + ); + +END; + + +CREATE OR REPLACE PROCEDURE {{.output_schema}}.mobile_session_context_fields (OUT SESSION_ID STRING, + OUT SESSION_CONTEXT_COLUMNS STRING) + +BEGIN + + DECLARE SESSION_FIELDS ARRAY>; + --Mandatory context. Returns all fields across session context schema versions. + CALL {{.output_schema}}.combine_field_versions( + '{{.input_schema}}', -- source_schema + 'events', -- source_table + '["session_id","session_index","previous_session_id","user_id","first_event_id"]', --source_fields. Quoted array to allow for concat + 'contexts_com_snowplowanalytics_snowplow_client_session_1_0', -- columns_prefix + true, -- rename_y_n + '["session_id","session_index","previous_session_id","device_user_id","session_first_event_id"]', -- rename_fields + SESSION_FIELDS -- returns all fields in context + path + ); + + CALL {{.output_schema}}.concat_fields( + SESSION_FIELDS, + SESSION_CONTEXT_COLUMNS -- returned context coalesce columns + ); + + SET SESSION_ID = ( + SELECT paths_coalesce FROM UNNEST(SESSION_FIELDS) WHERE field_name = 'session_id' + ); + +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/99-complete/01-cleanup-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/99-complete/01-cleanup-metadata.sql new file mode 100644 index 00000000..e707b3f5 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/99-complete/01-cleanup-metadata.sql @@ -0,0 +1,17 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}}; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/XX-destroy/XX-destroy-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/XX-destroy/XX-destroy-metadata.sql new file mode 100644 index 00000000..15c68270 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/00-setup/XX-destroy/XX-destroy-metadata.sql @@ -0,0 +1,17 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}}; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql new file mode 100644 index 00000000..3fdff00b --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql @@ -0,0 +1,104 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- A table storing an identifier for this run of a model - used to identify runs of the model across multiple modules/steps (eg. base, page views share this id per run) +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}} ( + run_id TIMESTAMP +); + +-- When base runs, it's always the first module. So it's safe to just truncate here. +TRUNCATE TABLE {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}}; + +INSERT INTO {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}} ( + SELECT + CURRENT_TIMESTAMP() +); + +-- Permanent metadata table +CREATE TABLE IF NOT EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}} ( + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +) +PARTITION BY DATE(run_start_tstamp); + +-- Setup temp metadata tables for this run +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_metadata_this_run{{.entropy}} ( + id STRING, + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +); + +INSERT INTO {{.scratch_schema}}.{{.model}}_base_metadata_this_run{{.entropy}} ( + SELECT + 'run', + run_id, + '{{.model_version}}', + '{{.model}}', + 'base', + CURRENT_TIMESTAMP(), + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + FROM {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}} +); + + +-- Setup manifests +CREATE TABLE IF NOT EXISTS {{.output_schema}}.{{.model}}_base_event_id_manifest{{.entropy}} +PARTITION BY DATE(collector_tstamp) +AS ( + SELECT + 'seed' AS event_id, + TIMESTAMP('{{.start_date}}') AS collector_tstamp +); + +CREATE TABLE IF NOT EXISTS {{.output_schema}}.{{.model}}_base_session_id_manifest{{.entropy}} +PARTITION BY DATE(min_tstamp) +AS ( + SELECT + 'seed' AS session_id, + TIMESTAMP('{{.start_date}}') AS min_tstamp +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/01-new-events-limits.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/01-new-events-limits.sql new file mode 100644 index 00000000..93ba6047 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/01-new-events-limits.sql @@ -0,0 +1,27 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Create a limit for this run - single value table. +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_new_events_limits{{.entropy}} +AS( + SELECT + TIMESTAMP_SUB(MAX(collector_tstamp), INTERVAL {{or .lookback_window_hours 6}} HOUR) AS lower_limit, + TIMESTAMP_ADD(MAX(collector_tstamp), INTERVAL {{or .update_cadence_days 7}} DAY) AS upper_limit, + TIMESTAMP_SUB(MAX(collector_tstamp), INTERVAL {{or .session_lookback_days 365}} DAY) AS session_limit + + FROM + {{.output_schema}}.{{.model}}_base_event_id_manifest{{.entropy}} +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/02-run-manifest.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/02-run-manifest.sql new file mode 100644 index 00000000..cfb4e47a --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/02-run-manifest.sql @@ -0,0 +1,46 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Use variable to set scan limits +DECLARE LOWER_LIMIT, SESSION_LIMIT TIMESTAMP; + +SET (LOWER_LIMIT, SESSION_LIMIT) = (SELECT AS STRUCT lower_limit, session_limit FROM {{.scratch_schema}}.{{.model}}_base_new_events_limits{{.entropy}}); + +-- Subset the manifest for performance. +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_run_manifest{{.entropy}} +AS( + SELECT + * + + FROM + {{.output_schema}}.{{.model}}_base_event_id_manifest{{.entropy}} + + WHERE + collector_tstamp >= TIMESTAMP_SUB(LOWER_LIMIT, INTERVAL 7 DAY) +); + +-- Subset session manifest table - should be as long a timeframe as practical +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_session_id_run_manifest{{.entropy}} +AS( + SELECT + * + + FROM + {{.output_schema}}.{{.model}}_base_session_id_manifest{{.entropy}} + + WHERE + min_tstamp >= SESSION_LIMIT +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/03-sessions-to-process.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/03-sessions-to-process.sql new file mode 100644 index 00000000..bf74edac --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/03-sessions-to-process.sql @@ -0,0 +1,73 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Use variable to set scan limits +DECLARE LOWER_LIMIT, UPPER_LIMIT TIMESTAMP; +DECLARE SESSIONS_TO_PROCESS_QUERY STRING; +{{if eq .model "mobile"}} + -- Session context schema evolved with time. Finding all versions of column. + DECLARE SESSION_ID, SESSION_CONTEXT_COLUMNS STRING; + CALL {{.output_schema}}.mobile_session_context_fields(SESSION_ID, SESSION_CONTEXT_COLUMNS); + +{{end}} + +SET (LOWER_LIMIT, UPPER_LIMIT) = (SELECT AS STRUCT lower_limit, upper_limit FROM {{.scratch_schema}}.{{.model}}_base_new_events_limits{{.entropy}}); + +SET SESSIONS_TO_PROCESS_QUERY = format(""" + -- Get sessionids for new events + CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_sessions_to_process{{.entropy}} + AS( + SELECT + {{if eq .model "web"}} a.domain_sessionid {{else if eq .model "mobile"}} %s {{end}} AS session_id, + MIN(a.collector_tstamp) AS min_tstamp, + MAX(a.collector_tstamp) AS max_tstamp + + FROM + {{.input_schema}}.events a + LEFT JOIN + {{.scratch_schema}}.{{.model}}_base_run_manifest{{.entropy}} b + ON a.event_id = b.event_id + + WHERE + b.event_id IS NULL + AND a.collector_tstamp >= @lowerLimit + AND a.collector_tstamp <= @upperLimit + AND TIMESTAMP_DIFF(a.dvce_sent_tstamp, a.dvce_created_tstamp, DAY) <= {{or .days_late_allowed 3}} + -- don't process data that's too late + AND {{if eq .model "web"}} a.domain_sessionid {{else if eq .model "mobile"}} %s {{end}} IS NOT NULL + -- Filter by platform. Required. + AND a.platform IN ( + {{range $i, $platform := .platform_filters}} {{if $i}}, {{end}} '{{$platform}}' -- User defined platforms if specified + {{else}} + {{if eq .model "web"}} 'web' {{else if eq .model "mobile"}} 'mob' {{end}} --default values + {{end}} + ) + {{if .app_id_filters}} + -- Filter by app_id. Ignore if not specified. + AND a.app_id IN ( {{range $i, $app_id := .app_id_filters}} {{if $i}}, {{end}} '{{$app_id}}' {{end}} ) + {{end}} + + {{if eq (or .derived_tstamp_partitioned false) true}} + + AND a.derived_tstamp >= @lowerLimit + AND a.derived_tstamp <= @upperLimit + + {{end}} + + GROUP BY 1 + );""" {{if eq .model "mobile"}} , SESSION_ID, SESSION_ID {{end}}); --Only sub strings if mobile model. + +EXECUTE IMMEDIATE SESSIONS_TO_PROCESS_QUERY USING LOWER_LIMIT AS lowerLimit, UPPER_LIMIT AS upperLimit; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/04-sessions-to-include.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/04-sessions-to-include.sql new file mode 100644 index 00000000..05a9eae6 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/04-sessions-to-include.sql @@ -0,0 +1,35 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Get only those session ids that we'd like to process in this run. +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}} +AS( + SELECT + a.session_id, + LEAST(a.min_tstamp, COALESCE(b.min_tstamp, a.min_tstamp)) AS min_tstamp + -- LEAST() returns null if one is null, hence the coalesce. + + FROM + {{.scratch_schema}}.{{.model}}_base_sessions_to_process{{.entropy}} a + LEFT JOIN + {{.scratch_schema}}.{{.model}}_base_session_id_run_manifest{{.entropy}} b + ON a.session_id = b.session_id + + WHERE + a.session_id IS NOT NULL + AND TIMESTAMP_DIFF(a.max_tstamp, COALESCE(b.min_tstamp, a.max_tstamp), DAY) <= {{or .days_late_allowed 3}} + -- Compares the max_tstamp of new data to the min_tstamp for its existing session, if one exists. +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/05-batch-limits.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/05-batch-limits.sql new file mode 100644 index 00000000..f356b3dd --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/05-batch-limits.sql @@ -0,0 +1,25 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Create a new limit based on this data +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_run_limits{{.entropy}} AS( + SELECT + MIN(min_tstamp) AS lower_limit, + (SELECT upper_limit FROM {{.scratch_schema}}.{{.model}}_base_new_events_limits{{.entropy}}) AS upper_limit + + FROM + {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}} +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/06-events-this-run.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/06-events-this-run.sql new file mode 100644 index 00000000..d59396b4 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/06-events-this-run.sql @@ -0,0 +1,183 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Use variable to set scan limits +DECLARE LOWER_LIMIT, UPPER_LIMIT TIMESTAMP; +{{if eq .model "mobile"}} + -- Session and mobile context schema evolved with time. Finding all versions of column. + DECLARE SESSION_ID, SESSION_CONTEXT_COLUMNS, MOBILE_CONTEXT_COLUMNS, MOBILE_EVENTS_QUERY STRING; + CALL {{.output_schema}}.mobile_session_context_fields(SESSION_ID, SESSION_CONTEXT_COLUMNS); + CALL {{.output_schema}}.mobile_mobile_context_fields({{.mobile_context}}, MOBILE_CONTEXT_COLUMNS); + +{{end}} + +SET (LOWER_LIMIT, UPPER_LIMIT) = (SELECT AS STRUCT lower_limit, upper_limit FROM {{.scratch_schema}}.{{.model}}_base_run_limits{{.entropy}}); + + {{if eq .model "web"}} + CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_events_this_run{{.entropy}} + AS( + -- Without downstream joins, it's safe to dedupe by picking the first event_id found. + SELECT + ARRAY_AGG(e ORDER BY e.collector_tstamp LIMIT 1)[OFFSET(0)].* + FROM ( + SELECT + a.contexts_com_snowplowanalytics_snowplow_web_page_1_0_0[SAFE_OFFSET(0)].id AS page_view_id, + a.* EXCEPT(contexts_com_snowplowanalytics_snowplow_web_page_1_0_0) + + FROM + {{.input_schema}}.events a + INNER JOIN + {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}} b + ON a.domain_sessionid = b.session_id + WHERE + a.collector_tstamp >= LOWER_LIMIT + AND a.collector_tstamp <= UPPER_LIMIT + AND a.platform IN ( {{range $i, $platform := .platform_filters}} {{if $i}}, {{end}} '{{$platform}}' {{else}} 'web' {{end}} ) + {{if .app_id_filters}} + -- Filter by app_id. Ignore if not specified. + AND a.app_id IN ( {{range $i, $app_id := .app_id_filters}} {{if $i}}, {{end}} '{{$app_id}}' {{end}} ) + {{end}} + + {{if eq (or .derived_tstamp_partitioned false) true}} + + AND a.derived_tstamp >= LOWER_LIMIT + AND a.derived_tstamp <= UPPER_LIMIT + + {{end}} + + ) e + GROUP BY + e.event_id + ); + {{end}} + + {{if eq .model "mobile"}} + + SET MOBILE_EVENTS_QUERY = format(""" + CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_events_this_run{{.entropy}} + PARTITION BY DATE(collector_tstamp) + AS( + + WITH events AS ( + + SELECT + -- Screen view event + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.id AS screen_view_id, + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.name AS screen_view_name, + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.previous_id AS screen_view_previous_id, + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.previous_name AS screen_view_previous_name, + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.previous_type AS screen_view_previous_type, + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.transition_type AS screen_view_transition_type, + a.unstruct_event_com_snowplowanalytics_mobile_screen_view_1_0_0.type AS screen_view_type, + -- Session context + %s, + -- Mobile context + %s, + -- Geo context + {{if eq .geolocation_context true}} + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].latitude AS device_latitude, + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].longitude AS device_longitude, + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].latitude_longitude_accuracy AS device_latitude_longitude_accuracy, + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].altitude AS device_altitude, + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].altitude_accuracy AS device_altitude_accuracy, + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].bearing AS device_bearing, + a.contexts_com_snowplowanalytics_snowplow_geolocation_context_1_1_0[SAFE_OFFSET(0)].speed AS device_speed, + {{else}} + CAST(NULL AS FLOAT64) AS device_latitude, + CAST(NULL AS FLOAT64) AS device_longitude, + CAST(NULL AS FLOAT64) AS device_latitude_longitude_accuracy, + CAST(NULL AS FLOAT64) AS device_altitude, + CAST(NULL AS FLOAT64) AS device_altitude_accuracy, + CAST(NULL AS FLOAT64) AS device_bearing, + CAST(NULL AS FLOAT64) AS device_speed, + {{end}} + -- App context + {{if eq .application_context true}} + a.contexts_com_snowplowanalytics_mobile_application_1_0_0[SAFE_OFFSET(0)].build, + a.contexts_com_snowplowanalytics_mobile_application_1_0_0[SAFE_OFFSET(0)].version, + {{else}} + CAST(NULL AS STRING) AS build, + CAST(NULL AS STRING) AS version, + {{end}} + -- Screen context + {{if eq .screen_context true}} + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].id AS screen_id, + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].name AS screen_name, + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].activity AS screen_activity, + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].fragment AS screen_fragment, + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].top_view_controller AS screen_top_view_controller, + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].type AS screen_type, + a.contexts_com_snowplowanalytics_mobile_screen_1_0_0[SAFE_OFFSET(0)].view_controller AS screen_view_controller, + {{else}} + CAST(NULL AS STRING) AS screen_id, + CAST(NULL AS STRING) AS screen_name, + CAST(NULL AS STRING) AS screen_activity, + CAST(NULL AS STRING) AS screen_fragment, + CAST(NULL AS STRING) AS screen_top_view_controller, + CAST(NULL AS STRING) AS screen_type, + CAST(NULL AS STRING) AS screen_view_controller, + {{end}} + -- select a.* after contexts to allow for future additional columns to be added to events_staged during UDF commit_table migratation step. + -- leaving original context arrays in staged table. Future schema versions may contain fields that are interesting to customers but not the standard model. + a.* + + FROM + {{.input_schema}}.events a + INNER JOIN + {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}} b + ON %s = b.session_id + + WHERE + a.collector_tstamp >= @lowerLimit + AND a.collector_tstamp <= @upperLimit + AND a.platform IN ( {{range $i, $platform := .platform_filters}} {{if $i}}, {{end}} '{{$platform}}' {{else}} 'mob' {{end}} ) + {{if .app_id_filters}} + -- Filter by app_id. Ignore if not specified. + AND a.app_id IN ( {{range $i, $app_id := .app_id_filters}} {{if $i}}, {{end}} '{{$app_id}}' {{end}} ) + {{end}} + + {{if eq (or .derived_tstamp_partitioned false) true}} + + AND a.derived_tstamp >= @lowerLimit + AND a.derived_tstamp <= @upperLimit + + {{end}} + + ) + + , deduped_events AS ( + -- Without downstream joins, it's safe to dedupe by picking the first event_id found. + SELECT + ARRAY_AGG(e ORDER BY e.collector_tstamp LIMIT 1)[OFFSET(0)].* + + FROM + events AS e + + GROUP BY + e.event_id + ) + + SELECT + *, + ROW_NUMBER() OVER(PARTITION BY d.session_id ORDER BY d.derived_tstamp) AS event_index_in_session + + FROM + deduped_events AS d + );""", SESSION_CONTEXT_COLUMNS, MOBILE_CONTEXT_COLUMNS, SESSION_ID); + + EXECUTE IMMEDIATE MOBILE_EVENTS_QUERY USING LOWER_LIMIT AS lowerLimit, UPPER_LIMIT AS upperLimit; + + {{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/07-base-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/07-base-metadata.sql new file mode 100644 index 00000000..1de18779 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/07-base-metadata.sql @@ -0,0 +1,41 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.{{.model}}_base_run_metadata_temp{{.entropy}} AS ( + SELECT + 'run' AS id, + count(*) AS rows_this_run, + 'event_id' AS distinct_key, + count(DISTINCT event_id) AS distinct_key_count, + 'collector_tstamp' AS time_key, + MIN(collector_tstamp) AS min_time_key, + MAX(collector_tstamp) AS max_time_key + + FROM + {{.scratch_schema}}.{{.model}}_events_this_run{{.entropy}} +); + +UPDATE {{.scratch_schema}}.{{.model}}_base_metadata_this_run{{.entropy}} a + SET + run_end_tstamp = CURRENT_TIMESTAMP(), + rows_this_run = b.rows_this_run, + distinct_key = b.distinct_key, + distinct_key_count = b.distinct_key_count, + time_key = b.time_key, + min_time_key = b.min_time_key, + max_time_key = b.max_time_key + FROM {{.scratch_schema}}.{{.model}}_base_run_metadata_temp{{.entropy}} b + WHERE a.id = b.id; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/08-commit-base.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/08-commit-base.sql new file mode 100644 index 00000000..4582e2cb --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/01-main/08-commit-base.sql @@ -0,0 +1,52 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +BEGIN + + {{if eq .stage_next true}} + -- Commit staging if enabled + -- Note: automigrate is hardcoded to true here on purpose + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + '{{.model}}_events_this_run{{.entropy}}', -- sourceTable + '{{.scratch_schema}}', -- targetDataset + '{{.model}}_events_staged{{.entropy}}', -- targetTable + 'event_id', -- joinKey + 'collector_tstamp', -- partitionKey + TRUE); -- automigrate + {{end}} + + -- Commit metadata + INSERT {{.output_schema}}.datamodel_metadata{{.entropy}} ( + SELECT + run_id, + model_version, + model, + module, + run_start_tstamp, + run_end_tstamp, + rows_this_run, + distinct_key, + distinct_key_count, + time_key, + min_time_key, + max_time_key, + duplicate_rows_removed, + distinct_keys_removed + + FROM {{.scratch_schema}}.{{.model}}_base_metadata_this_run{{.entropy}} + ); + +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/99-complete/98-base-manifest.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/99-complete/98-base-manifest.sql new file mode 100644 index 00000000..c0200ea6 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/99-complete/98-base-manifest.sql @@ -0,0 +1,34 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +DECLARE LOWER_LIMIT TIMESTAMP; + +SET LOWER_LIMIT = (SELECT lower_limit FROM {{.scratch_schema}}.{{.model}}_base_run_limits{{.entropy}}); + +BEGIN + DELETE + FROM {{.output_schema}}.{{.model}}_base_event_id_manifest{{.entropy}} + WHERE event_id IN (SELECT event_id FROM {{.scratch_schema}}.{{.model}}_events_this_run{{.entropy}}) + AND collector_tstamp >= LOWER_LIMIT; + + INSERT INTO {{.output_schema}}.{{.model}}_base_event_id_manifest{{.entropy}} (SELECT event_id, collector_tstamp FROM {{.scratch_schema}}.{{.model}}_events_this_run{{.entropy}}); + + -- Commit session_id manifest + DELETE + FROM {{.output_schema}}.{{.model}}_base_session_id_manifest{{.entropy}} + WHERE session_id IN (SELECT session_id FROM {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}}); + + INSERT INTO {{.output_schema}}.{{.model}}_base_session_id_manifest{{.entropy}} (SELECT * FROM {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}}); +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/99-complete/99-base-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/99-complete/99-base-cleanup.sql new file mode 100644 index 00000000..e4f06a8a --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/99-complete/99-base-cleanup.sql @@ -0,0 +1,39 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq .cleanup_mode "trace"}} SELECT 1; {{else}} + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_new_events_limits{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_sessions_to_process{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_sessions_to_include{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_metadata_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_run_metadata_temp{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_session_id_run_manifest{{.entropy}}; +{{end}} + +{{if eq .cleanup_mode "debug" "trace"}} SELECT 1; {{else}} + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_run_manifest{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_events_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_base_run_limits{{.entropy}}; + DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_field_versions; + DROP PROCEDURE IF EXISTS {{.output_schema}}.concat_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; +{{end}} + +{{if eq .ends_run true}} + DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_metadata_run_id{{.entropy}}; +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/01-base/XX-destroy/XX-destroy-base.sql b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/XX-destroy/XX-destroy-base.sql new file mode 100644 index 00000000..2405a71e --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/01-base/XX-destroy/XX-destroy-base.sql @@ -0,0 +1,25 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.output_schema}}.{{.model}}_base_event_id_manifest{{.entropy}}; +DROP TABLE IF EXISTS {{.output_schema}}.{{.model}}_base_session_id_manifest{{.entropy}}; +DROP TABLE IF EXISTS {{.scratch_schema}}.{{.model}}_events_staged{{.entropy}}; +DROP FUNCTION IF EXISTS {{.output_schema}}.columnCheckQuery; +DROP PROCEDURE IF EXISTS {{.output_schema}}.commit_table; +DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_column_versions; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/00-setup-screen-views.sql b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/00-setup-screen-views.sql new file mode 100644 index 00000000..81e9fb00 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/00-setup-screen-views.sql @@ -0,0 +1,225 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DECLARE RUN_ID TIMESTAMP; + +-- A table storing an identifier for this run of a model - used to identify runs of the model across multiple modules/steps (eg. base, page views share this id per run) +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + run_id TIMESTAMP +); + +-- Insert new run_id if one doesn't exist +SET RUN_ID = (SELECT run_id FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} LIMIT 1); + +IF RUN_ID IS NULL THEN + INSERT INTO {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + SELECT + CURRENT_TIMESTAMP() + ); +END IF; + +-- Permanent metadata table +CREATE TABLE IF NOT EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}} ( + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +) +PARTITION BY DATE(run_start_tstamp); + +-- Setup temp metadata tables for this run +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sv_metadata_this_run{{.entropy}} ( + id STRING, + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +); + +INSERT INTO {{.scratch_schema}}.mobile_sv_metadata_this_run{{.entropy}} ( + SELECT + 'run', + run_id, + '{{.model_version}}', + 'mobile', + 'screen-views', + CURRENT_TIMESTAMP(), + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + + FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} +); + +-- Create page views table if it doesn't exist +CREATE TABLE IF NOT EXISTS {{.output_schema}}.mobile_screen_views{{.entropy}} ( + + screen_view_id STRING NOT NULL, + event_id STRING, + app_id STRING, + user_id STRING, + device_user_id STRING, + network_userid STRING, + session_id STRING, + session_index INT64, + previous_session_id STRING, + session_first_event_id STRING, + screen_view_in_session_index INT64, + screen_views_in_session INT64, + dvce_created_tstamp TIMESTAMP, + collector_tstamp TIMESTAMP, + derived_tstamp TIMESTAMP, + model_tstamp TIMESTAMP, + screen_view_name STRING, + screen_view_transition_type STRING, + screen_view_type STRING, + screen_fragment STRING, + screen_top_view_controller STRING, + screen_view_controller STRING, + screen_view_previous_id STRING, + screen_view_previous_name STRING, + screen_view_previous_type STRING, + platform STRING, + dvce_screenwidth INT64, + dvce_screenheight INT64, + device_manufacturer STRING, + device_model STRING, + os_type STRING, + os_version STRING, + android_idfa STRING, + apple_idfa STRING, + apple_idfv STRING, + open_idfa STRING, + device_latitude FLOAT64, + device_longitude FLOAT64, + device_latitude_longitude_accuracy FLOAT64, + device_altitude FLOAT64, + device_altitude_accuracy FLOAT64, + device_bearing FLOAT64, + device_speed FLOAT64, + geo_country STRING, + geo_region STRING, + geo_city STRING, + geo_zipcode STRING, + geo_latitude FLOAT64, + geo_longitude FLOAT64, + geo_region_name STRING, + geo_timezone STRING, + user_ipaddress STRING, + useragent STRING, + carrier STRING, + network_technology STRING, + network_type STRING, + build STRING, + version STRING + +) +PARTITION BY DATE(derived_tstamp) +CLUSTER BY {{range $i, $cluster_field := .cluster_by}} {{if lt $i 4}} {{if $i}}, {{end}} {{$cluster_field}} {{end}} {{else}} app_id,device_user_id,session_id {{end}}; +--Cluster using `.cluster_by` var, else use defaults. Max 4 cluster by fields allowed + +-- Create staging table - acts as input to sessions step +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_screen_views_staged{{.entropy}} ( + + screen_view_id STRING NOT NULL, + event_id STRING, + app_id STRING, + user_id STRING, + device_user_id STRING, + network_userid STRING, + session_id STRING, + session_index INT64, + previous_session_id STRING, + session_first_event_id STRING, + screen_view_in_session_index INT64, + screen_views_in_session INT64, + dvce_created_tstamp TIMESTAMP, + collector_tstamp TIMESTAMP, + derived_tstamp TIMESTAMP, + model_tstamp TIMESTAMP, + screen_view_name STRING, + screen_view_transition_type STRING, + screen_view_type STRING, + screen_fragment STRING, + screen_top_view_controller STRING, + screen_view_controller STRING, + screen_view_previous_id STRING, + screen_view_previous_name STRING, + screen_view_previous_type STRING, + platform STRING, + dvce_screenwidth INT64, + dvce_screenheight INT64, + device_manufacturer STRING, + device_model STRING, + os_type STRING, + os_version STRING, + android_idfa STRING, + apple_idfa STRING, + apple_idfv STRING, + open_idfa STRING, + device_latitude FLOAT64, + device_longitude FLOAT64, + device_latitude_longitude_accuracy FLOAT64, + device_altitude FLOAT64, + device_altitude_accuracy FLOAT64, + device_bearing FLOAT64, + device_speed FLOAT64, + geo_country STRING, + geo_region STRING, + geo_city STRING, + geo_zipcode STRING, + geo_latitude FLOAT64, + geo_longitude FLOAT64, + geo_region_name STRING, + geo_timezone STRING, + user_ipaddress STRING, + useragent STRING, + carrier STRING, + network_technology STRING, + network_type STRING, + build STRING, + version STRING + +) +PARTITION BY DATE(derived_tstamp) +CLUSTER BY {{range $i, $cluster_field := .cluster_by}} {{if lt $i 4}} {{if $i}}, {{end}} {{$cluster_field}} {{end}} {{else}} app_id,device_user_id,session_id {{end}}; +--Cluster using `.cluster_by` var, else use defaults. Max 4 cluster by fields allowed diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/01-screen-views.sql b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/01-screen-views.sql new file mode 100644 index 00000000..576439f2 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/01-screen-views.sql @@ -0,0 +1,177 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_screen_views_this_run{{.entropy}} +PARTITION BY DATE(derived_tstamp) +AS ( + WITH staging AS ( + SELECT + ev.screen_view_id, + ev.event_id, + + ev.app_id, + + ev.user_id, + ev.device_user_id, + ev.network_userid, + + ev.session_id, + ev.session_index, + ev.previous_session_id, + ev.session_first_event_id, + + ROW_NUMBER() OVER (PARTITION BY ev.session_id ORDER BY ev.derived_tstamp) AS screen_view_in_session_index, + + ev.dvce_created_tstamp, + ev.collector_tstamp, + ev.derived_tstamp, + + ev.screen_view_name, + ev.screen_view_transition_type, + ev.screen_view_type, + ev.screen_fragment, + ev.screen_top_view_controller, + ev.screen_view_controller, + ev.screen_view_previous_id, + ev.screen_view_previous_name, + ev.screen_view_previous_type, + + ev.platform, + ev.dvce_screenwidth, + ev.dvce_screenheight, + ev.device_manufacturer, + ev.device_model, + ev.os_type, + ev.os_version, + ev.android_idfa, + ev.apple_idfa, + ev.apple_idfv, + ev.open_idfa, + + ev.device_latitude, + ev.device_longitude, + ev.device_latitude_longitude_accuracy, + ev.device_altitude, + ev.device_altitude_accuracy, + ev.device_bearing, + ev.device_speed, + ev.geo_country, + ev.geo_region, + ev.geo_city, + ev.geo_zipcode, + ev.geo_latitude, + ev.geo_longitude, + ev.geo_region_name, + ev.geo_timezone, + + ev.user_ipaddress, + + ev.useragent, + + ev.carrier, + ev.network_technology, + ev.network_type, + + ev.build, + ev.version + + FROM ( + SELECT + ARRAY_AGG(e ORDER BY e.derived_tstamp LIMIT 1)[OFFSET(0)] AS ev + -- order by matters here since derived_tstamp determines parts of model logic + + FROM {{.scratch_schema}}.mobile_events_staged{{.entropy}} e + WHERE e.event_name = 'screen_view' + AND e.screen_view_id IS NOT NULL + GROUP BY e.screen_view_id + ) + ) + + SELECT + s.screen_view_id, + s.event_id, + + s.app_id, + + s.user_id, + s.device_user_id, + s.network_userid, + + s.session_id, + s.session_index, + s.previous_session_id, + s.session_first_event_id, + + s.screen_view_in_session_index, + MAX(s.screen_view_in_session_index) OVER (PARTITION BY s.session_id) AS screen_views_in_session, + + s.dvce_created_tstamp, + s.collector_tstamp, + s.derived_tstamp, + CURRENT_TIMESTAMP() AS model_tstamp, + + s.screen_view_name, + s.screen_view_transition_type, + s.screen_view_type, + s.screen_fragment, + s.screen_top_view_controller, + s.screen_view_controller, + s.screen_view_previous_id, + s.screen_view_previous_name, + s.screen_view_previous_type, + + s.platform, + s.dvce_screenwidth, + s.dvce_screenheight, + s.device_manufacturer, + s.device_model, + s.os_type, + s.os_version, + s.android_idfa, + s.apple_idfa, + s.apple_idfv, + s.open_idfa, + + s.device_latitude, + s.device_longitude, + s.device_latitude_longitude_accuracy, + s.device_altitude, + s.device_altitude_accuracy, + s.device_bearing, + s.device_speed, + s.geo_country, + s.geo_region, + s.geo_city, + s.geo_zipcode, + s.geo_latitude, + s.geo_longitude, + s.geo_region_name, + s.geo_timezone, + + s.user_ipaddress, + + s.useragent, + + s.carrier, + s.network_technology, + s.network_type, + + s.build, + s.version + + FROM + staging AS s +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/02-screen-views-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/02-screen-views-metadata.sql new file mode 100644 index 00000000..6deab348 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/02-screen-views-metadata.sql @@ -0,0 +1,41 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sv_run_metadata_temp{{.entropy}} AS ( + SELECT + 'run' AS id, + count(*) AS rows_this_run, + 'screen_view_id' AS distinct_key, + count(DISTINCT screen_view_id) AS distinct_key_count, + 'derived_tstamp' AS time_key, + MIN(derived_tstamp) AS min_time_key, + MAX(derived_tstamp) AS max_time_key + + FROM + {{.scratch_schema}}.mobile_screen_views_this_run{{.entropy}} +); + +UPDATE {{.scratch_schema}}.mobile_sv_metadata_this_run{{.entropy}} a + SET + rows_this_run = b.rows_this_run, + distinct_key = b.distinct_key, + distinct_key_count = b.distinct_key_count, + time_key = b.time_key, + min_time_key = b.min_time_key, + max_time_key = b.max_time_key + + FROM {{.scratch_schema}}.mobile_sv_run_metadata_temp{{.entropy}} b + WHERE a.id = b.id; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/03-commit-screen-views.sql b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/03-commit-screen-views.sql new file mode 100644 index 00000000..1e5abd9f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/01-main/03-commit-screen-views.sql @@ -0,0 +1,68 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +BEGIN + + {{if ne (or .skip_derived false) true}} + + -- Commit to production if enabled + -- Note: Automigrate hardcoded to false as all columns are to be explicitly defined in model. + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'mobile_screen_views_this_run{{.entropy}}', -- sourceTable + '{{.output_schema}}', -- targetDataset + 'mobile_screen_views{{.entropy}}', -- targetTable + 'screen_view_id', -- joinKey + 'derived_tstamp', -- partitionKey + FALSE); -- automigrate + + {{end}} + + {{if eq .stage_next true}} + + + -- Commit staging table if enabled + -- Note: Automigrate hardcoded to false as all columns are to be explicitly defined in model. + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'mobile_screen_views_this_run{{.entropy}}', -- sourceTable + '{{.scratch_schema}}', -- targetDataset + 'mobile_screen_views_staged{{.entropy}}', -- targetTable + 'screen_view_id', -- joinKey + 'derived_tstamp', -- partitionKey + FALSE); -- automigrate + + {{end}} + + -- Commit metadata + INSERT {{.output_schema}}.datamodel_metadata{{.entropy}} ( + SELECT + run_id, + model_version, + model, + module, + run_start_tstamp, + CURRENT_TIMESTAMP() AS run_end_tstamp, + rows_this_run, + distinct_key, + distinct_key_count, + time_key, + min_time_key, + max_time_key, + duplicate_rows_removed, + distinct_keys_removed + FROM {{.scratch_schema}}.mobile_sv_metadata_this_run{{.entropy}} + ); + +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/99-complete/99-screen-views-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/99-complete/99-screen-views-cleanup.sql new file mode 100644 index 00000000..cc62c34e --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/99-complete/99-screen-views-cleanup.sql @@ -0,0 +1,40 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq .cleanup_mode "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sv_metadata_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sv_run_metadata_temp{{.entropy}}; + +{{end}} + + +{{if eq .cleanup_mode "debug" "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_screen_views_this_run{{.entropy}}; + DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_field_versions; + DROP PROCEDURE IF EXISTS {{.output_schema}}.concat_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; + +{{end}} + +{{if eq .ends_run true}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}}; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/XX-destroy/XX-destroy-screen-views.sql b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/XX-destroy/XX-destroy-screen-views.sql new file mode 100644 index 00000000..aa6cb04d --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/02-screen-views/XX-destroy/XX-destroy-screen-views.sql @@ -0,0 +1,24 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.output_schema}}.mobile_screen_views{{.entropy}}; +DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_screen_views_staged{{.entropy}}; +DROP FUNCTION IF EXISTS {{.output_schema}}.columnCheckQuery; +DROP PROCEDURE IF EXISTS {{.output_schema}}.commit_table; +DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_column_versions; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/00-setup-app-errors.sql b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/00-setup-app-errors.sql new file mode 100644 index 00000000..b0d175d7 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/00-setup-app-errors.sql @@ -0,0 +1,246 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq (or .enabled false) true}} + + DECLARE RUN_ID TIMESTAMP; + + -- A table storing an identifier for this run of a model - used to identify runs of the model across multiple modules/steps (eg. base, page views share this id per run) + CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + run_id TIMESTAMP + ); + + -- Insert new run_id if one doesn't exist + SET RUN_ID = (SELECT run_id FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} LIMIT 1); + + IF RUN_ID IS NULL THEN + INSERT INTO {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + SELECT + CURRENT_TIMESTAMP() + ); + END IF; + + -- Permanent metadata table + CREATE TABLE IF NOT EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}} ( + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 + ) + PARTITION BY DATE(run_start_tstamp); + + -- Setup temp metadata tables for this run + CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_app_errors_metadata_this_run{{.entropy}} ( + id STRING, + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 + ); + + INSERT INTO {{.scratch_schema}}.mobile_app_errors_metadata_this_run{{.entropy}} ( + SELECT + 'run', + run_id, + '{{.model_version}}', + 'mobile', + 'app-errors', + CURRENT_TIMESTAMP(), + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + + FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} + ); + +{{end}} + +-- Reversing usual order as derived output is optional but staging is not. +-- Staging table always created even if module disabled. This allows for joins downstream. +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_app_errors_staged{{.entropy}} ( + + event_id STRING NOT NULL, + app_id STRING, + user_id STRING, + device_user_id STRING, + network_userid STRING, + session_id STRING, + session_index INT64, + previous_session_id STRING, + session_first_event_id STRING, + dvce_created_tstamp TIMESTAMP, + collector_tstamp TIMESTAMP, + derived_tstamp TIMESTAMP, + model_tstamp TIMESTAMP, + platform STRING, + dvce_screenwidth INT64, + dvce_screenheight INT64, + device_manufacturer STRING, + device_model STRING, + os_type STRING, + os_version STRING, + android_idfa STRING, + apple_idfa STRING, + apple_idfv STRING, + open_idfa STRING, + screen_id STRING, + screen_name STRING, + screen_activity STRING, + screen_fragment STRING, + screen_top_view_controller STRING, + screen_type STRING, + screen_view_controller STRING, + device_latitude FLOAT64, + device_longitude FLOAT64, + device_latitude_longitude_accuracy FLOAT64, + device_altitude FLOAT64, + device_altitude_accuracy FLOAT64, + device_bearing FLOAT64, + device_speed FLOAT64, + geo_country STRING, + geo_region STRING, + geo_city STRING, + geo_zipcode STRING, + geo_latitude FLOAT64, + geo_longitude FLOAT64, + geo_region_name STRING, + geo_timezone STRING, + user_ipaddress STRING, + useragent STRING, + carrier STRING, + network_technology STRING, + network_type STRING, + build STRING, + version STRING, + event_index_in_session INT64, + message STRING, + programming_language STRING, + class_name STRING, + exception_name STRING, + is_fatal BOOLEAN, + line_number INT64, + stack_trace STRING, + thread_id INT64, + thread_name STRING + +) +PARTITION BY DATE(derived_tstamp) +CLUSTER BY {{range $i, $cluster_field := .cluster_by}} {{if lt $i 4}} {{if $i}}, {{end}} {{$cluster_field}} {{end}} {{else}} app_id,device_user_id,session_id {{end}}; +--Cluster using `.cluster_by` var, else use defaults. Max 4 cluster by fields allowed + + +{{if eq (or .enabled false) true}} + {{if ne (or .skip_derived false) true}} + -- Create derived table + CREATE TABLE IF NOT EXISTS {{.output_schema}}.mobile_app_errors{{.entropy}} ( + + event_id STRING NOT NULL, + app_id STRING, + user_id STRING, + device_user_id STRING, + network_userid STRING, + session_id STRING, + session_index INT64, + previous_session_id STRING, + session_first_event_id STRING, + dvce_created_tstamp TIMESTAMP, + collector_tstamp TIMESTAMP, + derived_tstamp TIMESTAMP, + model_tstamp TIMESTAMP, + platform STRING, + dvce_screenwidth INT64, + dvce_screenheight INT64, + device_manufacturer STRING, + device_model STRING, + os_type STRING, + os_version STRING, + android_idfa STRING, + apple_idfa STRING, + apple_idfv STRING, + open_idfa STRING, + screen_id STRING, + screen_name STRING, + screen_activity STRING, + screen_fragment STRING, + screen_top_view_controller STRING, + screen_type STRING, + screen_view_controller STRING, + device_latitude FLOAT64, + device_longitude FLOAT64, + device_latitude_longitude_accuracy FLOAT64, + device_altitude FLOAT64, + device_altitude_accuracy FLOAT64, + device_bearing FLOAT64, + device_speed FLOAT64, + geo_country STRING, + geo_region STRING, + geo_city STRING, + geo_zipcode STRING, + geo_latitude FLOAT64, + geo_longitude FLOAT64, + geo_region_name STRING, + geo_timezone STRING, + user_ipaddress STRING, + useragent STRING, + carrier STRING, + network_technology STRING, + network_type STRING, + build STRING, + version STRING, + event_index_in_session INT64, + message STRING, + programming_language STRING, + class_name STRING, + exception_name STRING, + is_fatal BOOLEAN, + line_number INT64, + stack_trace STRING, + thread_id INT64, + thread_name STRING + + ) + PARTITION BY DATE(derived_tstamp) + CLUSTER BY {{range $i, $cluster_field := .cluster_by}} {{if lt $i 4}} {{if $i}}, {{end}} {{$cluster_field}} {{end}} {{else}} app_id,device_user_id,session_id {{end}}; + --Cluster using `.cluster_by` var, else use defaults. Max 4 cluster by fields allowed + + {{end}} +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/01-app-errors.sql b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/01-app-errors.sql new file mode 100644 index 00000000..68e4afde --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/01-app-errors.sql @@ -0,0 +1,111 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq (or .enabled false) true}} + -- App errors events schema evolved over time. Finding all versions of the column + DECLARE APP_ERRORS_EVENTS_COLUMNS, APP_ERRORS_QUERY STRING; + CALL {{.output_schema}}.mobile_app_errors_fields(APP_ERRORS_EVENTS_COLUMNS); + + SET APP_ERRORS_QUERY = format(""" + CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_app_errors_this_run{{.entropy}} + PARTITION BY DATE(derived_tstamp) + AS ( + + SELECT + e.event_id, + + e.app_id, + + e.user_id, + e.device_user_id, + e.network_userid, + + e.session_id, + e.session_index, + e.previous_session_id, + e.session_first_event_id, + + e.dvce_created_tstamp, + e.collector_tstamp, + e.derived_tstamp, + CURRENT_TIMESTAMP() AS model_tstamp, + + e.platform, + e.dvce_screenwidth, + e.dvce_screenheight, + e.device_manufacturer, + e.device_model, + e.os_type, + e.os_version, + e.android_idfa, + e.apple_idfa, + e.apple_idfv, + e.open_idfa, + + e.screen_id, + e.screen_name, + e.screen_activity, + e.screen_fragment, + e.screen_top_view_controller, + e.screen_type, + e.screen_view_controller, + + e.device_latitude, + e.device_longitude, + e.device_latitude_longitude_accuracy, + e.device_altitude, + e.device_altitude_accuracy, + e.device_bearing, + e.device_speed, + e.geo_country, + e.geo_region, + e.geo_city, + e.geo_zipcode, + e.geo_latitude, + e.geo_longitude, + e.geo_region_name, + e.geo_timezone, + + e.user_ipaddress, + e.useragent, + + e.carrier, + e.network_technology, + e.network_type, + + e.build, + e.version, + e.event_index_in_session, + + --Error details + %s + + + FROM + {{.scratch_schema}}.mobile_events_staged{{.entropy}} e + + WHERE + e.event_name = 'application_error' + + );""", APP_ERRORS_EVENTS_COLUMNS); + + EXECUTE IMMEDIATE APP_ERRORS_QUERY; + +{{else}} + + SELECT 1; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/02-app-errors-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/02-app-errors-metadata.sql new file mode 100644 index 00000000..f37efc4e --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/02-app-errors-metadata.sql @@ -0,0 +1,49 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq (or .enabled false) true}} + + CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_app_errors_run_metadata_temp{{.entropy}} AS ( + SELECT + 'run' AS id, + count(*) AS rows_this_run, + 'event_id' AS distinct_key, + count(DISTINCT event_id) AS distinct_key_count, + 'derived_tstamp' AS time_key, + MIN(derived_tstamp) AS min_time_key, + MAX(derived_tstamp) AS max_time_key + + FROM + {{.scratch_schema}}.mobile_app_errors_this_run{{.entropy}} + ); + + UPDATE {{.scratch_schema}}.mobile_app_errors_metadata_this_run{{.entropy}} a + SET + rows_this_run = b.rows_this_run, + distinct_key = b.distinct_key, + distinct_key_count = b.distinct_key_count, + time_key = b.time_key, + min_time_key = b.min_time_key, + max_time_key = b.max_time_key + + FROM {{.scratch_schema}}.mobile_app_errors_run_metadata_temp{{.entropy}} b + WHERE a.id = b.id; + +{{else}} + + SELECT 1; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/03-commit-app-errors.sql b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/03-commit-app-errors.sql new file mode 100644 index 00000000..d0ce31b6 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/01-main/03-commit-app-errors.sql @@ -0,0 +1,76 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq (or .enabled false) true}} + + BEGIN + + {{if ne (or .skip_derived false) true}} + + -- Commit to production if enabled + -- Note: Automigrate hardcoded to false as all columns are to be explicitly defined in model. + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'mobile_app_errors_this_run{{.entropy}}', -- sourceTable + '{{.output_schema}}', -- targetDataset + 'mobile_app_errors{{.entropy}}', -- targetTable + 'event_id', -- joinKey + 'derived_tstamp', -- partitionKey + FALSE); -- automigrate + + {{end}} + + {{if eq .stage_next true}} + + + -- Commit staging table if enabled + -- Note: Automigrate hardcoded to false as all columns are to be explicitly defined in model. + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'mobile_app_errors_this_run{{.entropy}}', -- sourceTable + '{{.scratch_schema}}', -- targetDataset + 'mobile_app_errors_staged{{.entropy}}', -- targetTable + 'event_id', -- joinKey + 'derived_tstamp', -- partitionKey + FALSE); -- automigrate + + {{end}} + + -- Commit metadata + INSERT {{.output_schema}}.datamodel_metadata{{.entropy}} ( + SELECT + run_id, + model_version, + model, + module, + run_start_tstamp, + CURRENT_TIMESTAMP() AS run_end_tstamp, + rows_this_run, + distinct_key, + distinct_key_count, + time_key, + min_time_key, + max_time_key, + duplicate_rows_removed, + distinct_keys_removed + FROM {{.scratch_schema}}.mobile_app_errors_metadata_this_run{{.entropy}} + ); + + END; + +{{else}} + + SELECT 1; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/99-complete/99-app-errors-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/99-complete/99-app-errors-cleanup.sql new file mode 100644 index 00000000..919682ba --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/99-complete/99-app-errors-cleanup.sql @@ -0,0 +1,40 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq .cleanup_mode "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_app_errors_metadata_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_app_errors_run_metadata_temp{{.entropy}}; + +{{end}} + + +{{if eq .cleanup_mode "debug" "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_app_errors_this_run{{.entropy}}; + DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_field_versions; + DROP PROCEDURE IF EXISTS {{.output_schema}}.concat_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; + +{{end}} + +{{if eq .ends_run true}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}}; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/XX-destroy/XX-destroy-app-errors.sql b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/XX-destroy/XX-destroy-app-errors.sql new file mode 100644 index 00000000..18893f1b --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/03-optional-modules/01-app-errors/XX-destroy/XX-destroy-app-errors.sql @@ -0,0 +1,24 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.output_schema}}.mobile_app_errors{{.entropy}}; +DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_app_errors_staged{{.entropy}}; +DROP FUNCTION IF EXISTS {{.output_schema}}.columnCheckQuery; +DROP PROCEDURE IF EXISTS {{.output_schema}}.commit_table; +DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_column_versions; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/00-setup-sessions.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/00-setup-sessions.sql new file mode 100644 index 00000000..d00c87af --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/00-setup-sessions.sql @@ -0,0 +1,169 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + + +DECLARE RUN_ID TIMESTAMP; + +-- A table storing an identifier for this run of a model - used to identify runs of the model across multiple modules/steps (eg. base, page views share this id per run) +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + run_id TIMESTAMP +); + +-- Insert new run_id if one doesn't exist +SET RUN_ID = (SELECT run_id FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} LIMIT 1); + +IF RUN_ID IS NULL THEN + INSERT INTO {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + SELECT + CURRENT_TIMESTAMP() + ); +END IF; + +-- Permanent metadata table +CREATE TABLE IF NOT EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}} ( + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +); + +-- Setup Metadata +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sessions_metadata_this_run{{.entropy}} ( + id STRING, + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +); + +INSERT {{.scratch_schema}}.mobile_sessions_metadata_this_run{{.entropy}} ( + SELECT + 'run', + run_id, + '{{.model_version}}', + 'mobile', + 'sessions', + CURRENT_TIMESTAMP(), + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} +); + +-- Setup Sessions table +CREATE TABLE IF NOT EXISTS {{.output_schema}}.mobile_sessions{{.entropy}} ( + + app_id STRING, + session_id STRING NOT NULL, + session_index INT64, + previous_session_id STRING, + session_first_event_id STRING, + session_last_event_id STRING, + start_tstamp TIMESTAMP, + end_tstamp TIMESTAMP, + model_tstamp TIMESTAMP, + user_id STRING, + device_user_id STRING, + network_userid STRING, + session_duration_s INT64, + has_install BOOLEAN, + screen_views INT64, + screen_names_viewed INT64, + app_errors INT64, + fatal_app_errors INT64, + first_event_name STRING, + last_event_name STRING, + first_screen_view_name STRING, + first_screen_view_transition_type STRING, + first_screen_view_type STRING, + last_screen_view_name STRING, + last_screen_view_transition_type STRING, + last_screen_view_type STRING, + platform STRING, + dvce_screenwidth INT64, + dvce_screenheight INT64, + device_manufacturer STRING, + device_model STRING, + os_type STRING, + os_version STRING, + android_idfa STRING, + apple_idfa STRING, + apple_idfv STRING, + open_idfa STRING, + device_latitude FLOAT64, + device_longitude FLOAT64, + device_latitude_longitude_accuracy FLOAT64, + device_altitude FLOAT64, + device_altitude_accuracy FLOAT64, + device_bearing FLOAT64, + device_speed FLOAT64, + geo_country STRING, + geo_region STRING, + geo_city STRING, + geo_zipcode STRING, + geo_latitude FLOAT64, + geo_longitude FLOAT64, + geo_region_name STRING, + geo_timezone STRING, + user_ipaddress STRING, + useragent STRING, + name_tracker STRING, + v_tracker STRING, + carrier STRING, + network_technology STRING, + network_type STRING, + first_build STRING, + last_build STRING, + first_version STRING, + last_version STRING + +) +PARTITION BY DATE(start_tstamp) +CLUSTER BY {{range $i, $cluster_field := .cluster_by}} {{if lt $i 4}} {{if $i}}, {{end}} {{$cluster_field}} {{end}} {{else}} app_id,device_user_id,session_id {{end}}; +--Cluster using `.cluster_by` var, else use defaults. Max 4 cluster by fields allowed + +-- Staged manifest table as input to users step +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_sessions_userid_manifest_staged{{.entropy}} ( + device_user_id STRING, + start_tstamp TIMESTAMP +) +PARTITION BY DATE(start_tstamp); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/01-sessions-aggs.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/01-sessions-aggs.sql new file mode 100644 index 00000000..577a6748 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/01-sessions-aggs.sql @@ -0,0 +1,85 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sessions_aggregates{{.entropy}} +AS( + + WITH events AS ( + SELECT + es.session_id, + es.event_id, + es.event_name, + es.derived_tstamp, + es.build, + es.version, + es.event_index_in_session, + MAX(es.event_index_in_session) OVER (PARTITION BY es.session_id) AS events_in_session + + FROM + {{.scratch_schema}}.mobile_events_staged{{.entropy}} es + ) + + , session_aggs AS ( + SELECT + e.session_id, + --last dimensions + MAX(CASE WHEN e.event_index_in_session = e.events_in_session THEN e.build END) AS last_build, + MAX(CASE WHEN e.event_index_in_session = e.events_in_session THEN e.version END) AS last_version, + MAX(CASE WHEN e.event_index_in_session = e.events_in_session THEN e.event_name END) AS last_event_name, + MAX(CASE WHEN e.event_index_in_session = e.events_in_session THEN e.event_id END) AS session_last_event_id, + -- time + MIN(e.derived_tstamp) AS start_tstamp, + MAX(e.derived_tstamp) AS end_tstamp, + LOGICAL_OR(e.event_name = 'application_install') has_install + + FROM + events e + + GROUP BY 1 + ) + + , app_errors AS ( + SELECT + ae.session_id, + COUNT(DISTINCT ae.event_id) AS app_errors, + COUNT(DISTINCT CASE WHEN ae.is_fatal THEN ae.event_id END) AS fatal_app_errors + + FROM + {{.scratch_schema}}.mobile_app_errors_staged{{.entropy}} ae + + GROUP BY 1 + ) + + SELECT + sa.session_id, + sa.last_build, + sa.last_version, + sa.last_event_name, + sa.session_last_event_id, + sa.start_tstamp, + sa.end_tstamp, + TIMESTAMP_DIFF(sa.end_tstamp, sa.start_tstamp, SECOND) session_duration_s, + sa.has_install, + ae.app_errors, + ae.fatal_app_errors + + FROM + session_aggs sa + LEFT JOIN + app_errors ae + ON sa.session_id = ae.session_id + +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/02-sessions-sv-details.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/02-sessions-sv-details.sql new file mode 100644 index 00000000..91ae3b34 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/02-sessions-sv-details.sql @@ -0,0 +1,37 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sessions_screen_view_details{{.entropy}} +AS( + + SELECT + sv.session_id, + COUNT(DISTINCT sv.screen_view_id) AS screen_views, + COUNT(DISTINCT sv.screen_view_name) AS screen_names_viewed, + --Could split below into first/last scratch tables. Trying to minimise joins to sessions. + MAX(CASE WHEN sv.screen_view_in_session_index = 1 THEN sv.screen_view_name END) AS first_screen_view_name, + MAX(CASE WHEN sv.screen_view_in_session_index = 1 THEN sv.screen_view_transition_type END) AS first_screen_view_transition_type, + MAX(CASE WHEN sv.screen_view_in_session_index = 1 THEN sv.screen_view_type END) AS first_screen_view_type, + MAX(CASE WHEN sv.screen_view_in_session_index = sv.screen_views_in_session THEN sv.screen_view_name END) AS last_screen_view_name, + MAX(CASE WHEN sv.screen_view_in_session_index = sv.screen_views_in_session THEN sv.screen_view_transition_type END) AS last_screen_view_transition_type, + MAX(CASE WHEN sv.screen_view_in_session_index = sv.screen_views_in_session THEN sv.screen_view_type END) AS last_screen_view_type + + FROM + {{.scratch_schema}}.mobile_screen_views_staged{{.entropy}} sv + + GROUP BY 1 + +) diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/03-sessions.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/03-sessions.sql new file mode 100644 index 00000000..0088bacd --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/03-sessions.sql @@ -0,0 +1,110 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sessions_this_run{{.entropy}} +PARTITION BY DATE(start_tstamp) +AS ( + + SELECT + es.app_id, + + es.session_id, + es.session_index, + es.previous_session_id, + es.session_first_event_id, + sa.session_last_event_id, + + sa.start_tstamp, + sa.end_tstamp, + CURRENT_TIMESTAMP() AS model_tstamp, + + es.user_id, + es.device_user_id, + es.network_userid, + + sa.session_duration_s, + sa.has_install, + sv.screen_views, + sv.screen_names_viewed, + sa.app_errors, + sa.fatal_app_errors, + + es.event_name AS first_event_name, + sa.last_event_name, + + sv.first_screen_view_name, + sv.first_screen_view_transition_type, + sv.first_screen_view_type, + + sv.last_screen_view_name, + sv.last_screen_view_transition_type, + sv.last_screen_view_type, + + es.platform, + es.dvce_screenwidth, + es.dvce_screenheight, + es.device_manufacturer, + es.device_model, + es.os_type, + es.os_version, + es.android_idfa, + es.apple_idfa, + es.apple_idfv, + es.open_idfa, + + es.device_latitude, + es.device_longitude, + es.device_latitude_longitude_accuracy, + es.device_altitude, + es.device_altitude_accuracy, + es.device_bearing, + es.device_speed, + es.geo_country, + es.geo_region, + es.geo_city, + es.geo_zipcode, + es.geo_latitude, + es.geo_longitude, + es.geo_region_name, + es.geo_timezone, + + es.user_ipaddress, + + es.useragent, + es.name_tracker, + es.v_tracker, + + es.carrier, + es.network_technology, + es.network_type, + --first/last build/version to measure app updates. + es.build AS first_build, + sa.last_build, + es.version AS first_version, + sa.last_version + + FROM + {{.scratch_schema}}.mobile_events_staged{{.entropy}} es + INNER JOIN + {{.scratch_schema}}.mobile_sessions_aggregates{{.entropy}} sa + ON es.session_id = sa.session_id + LEFT JOIN --left join as session might not have screen view i.e. app error on opening + {{.scratch_schema}}.mobile_sessions_screen_view_details{{.entropy}} sv + ON es.session_id = sv.session_id + + WHERE + es.event_index_in_session = 1 +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/04-sessions-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/04-sessions-metadata.sql new file mode 100644 index 00000000..e39f782c --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/04-sessions-metadata.sql @@ -0,0 +1,42 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_run_metadata_temp{{.entropy}}; + +CREATE TABLE {{.scratch_schema}}.mobile_sessions_run_metadata_temp{{.entropy}} AS ( + SELECT + 'run' AS id, + count(*) AS rows_this_run, + 'session_id' AS distinct_key, + count(DISTINCT session_id) AS distinct_key_count, + 'start_tstamp' AS time_key, + MIN(start_tstamp) AS min_time_key, + MAX(start_tstamp) AS max_time_key + + FROM + {{.scratch_schema}}.mobile_sessions_this_run{{.entropy}} +); + +UPDATE {{.scratch_schema}}.mobile_sessions_metadata_this_run{{.entropy}} a + SET + rows_this_run = b.rows_this_run, + distinct_key = b.distinct_key, + distinct_key_count = b.distinct_key_count, + time_key = b.time_key, + min_time_key = b.min_time_key, + max_time_key = b.max_time_key + FROM {{.scratch_schema}}.mobile_sessions_run_metadata_temp{{.entropy}} b + WHERE a.id = b.id; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/05-sessions-prep-manifest.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/05-sessions-prep-manifest.sql new file mode 100644 index 00000000..f8f011c3 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/05-sessions-prep-manifest.sql @@ -0,0 +1,28 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Prep manifest data for users step +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_sessions_userid_manifest_this_run{{.entropy}} +AS( + SELECT + device_user_id, + MIN(start_tstamp) AS start_tstamp + + FROM + {{.scratch_schema}}.mobile_sessions_this_run{{.entropy}} + + GROUP BY 1 +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/06-commit-sessions.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/06-commit-sessions.sql new file mode 100644 index 00000000..af2d192d --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/01-main/06-commit-sessions.sql @@ -0,0 +1,65 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +BEGIN + + {{if ne (or .skip_derived false) true}} + + -- Commit to production if enabled + -- Note: Automigrate hardcoded to false as all columns are to be explicitly defined in model. + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'mobile_sessions_this_run{{.entropy}}', -- sourceTable + '{{.output_schema}}', -- targetDataset + 'mobile_sessions{{.entropy}}', -- targetTable + 'session_id', -- joinKey + 'start_tstamp', -- partitionKey + FALSE); -- automigrate + + {{end}} + + {{if eq .stage_next true}} + -- TODO: Figure out if this should be handled via call to commit table, or just manual. + + -- Commit staging manifest if enabled + DELETE FROM {{.scratch_schema}}.mobile_sessions_userid_manifest_staged{{.entropy}} + WHERE device_user_id IN (SELECT device_user_id FROM {{.scratch_schema}}.mobile_sessions_userid_manifest_this_run{{.entropy}}); + + INSERT INTO {{.scratch_schema}}.mobile_sessions_userid_manifest_staged{{.entropy}} + (SELECT * FROM {{.scratch_schema}}.mobile_sessions_userid_manifest_this_run{{.entropy}}); + {{end}} + + -- Commit metadata + INSERT INTO {{.output_schema}}.datamodel_metadata{{.entropy}} ( + SELECT + run_id, + model_version, + model, + module, + run_start_tstamp, + CURRENT_TIMESTAMP() AS run_end_tstamp, + rows_this_run, + distinct_key, + distinct_key_count, + time_key, + min_time_key, + max_time_key, + duplicate_rows_removed, + distinct_keys_removed + + FROM {{.scratch_schema}}.mobile_sessions_metadata_this_run{{.entropy}} + ); + +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/99-complete/98-truncate-upstream-staged.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/99-complete/98-truncate-upstream-staged.sql new file mode 100644 index 00000000..2d6669ac --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/99-complete/98-truncate-upstream-staged.sql @@ -0,0 +1,19 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +TRUNCATE TABLE {{.scratch_schema}}.mobile_events_staged{{.entropy}}; +TRUNCATE TABLE {{.scratch_schema}}.mobile_screen_views_staged{{.entropy}}; +TRUNCATE TABLE {{.scratch_schema}}.mobile_app_errors_staged{{.entropy}}; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/99-complete/99-sessions-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/99-complete/99-sessions-cleanup.sql new file mode 100644 index 00000000..195126fb --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/99-complete/99-sessions-cleanup.sql @@ -0,0 +1,42 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq .cleanup_mode "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_aggregates{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_screen_view_details{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_run_metadata_temp{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_metadata_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_userid_manifest_this_run{{.entropy}}; + +{{end}} + +{{if eq .cleanup_mode "debug" "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_this_run{{.entropy}}; + DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_field_versions; + DROP PROCEDURE IF EXISTS {{.output_schema}}.concat_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; + +{{end}} + +{{if eq .ends_run true}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}}; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/XX-destroy/XX-destroy-sessions.sql b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/XX-destroy/XX-destroy-sessions.sql new file mode 100644 index 00000000..3b5a679f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/04-sessions/XX-destroy/XX-destroy-sessions.sql @@ -0,0 +1,24 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.output_schema}}.mobile_sessions{{.entropy}}; +DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_sessions_userid_manifest_staged{{.entropy}}; +DROP FUNCTION IF EXISTS {{.output_schema}}.columnCheckQuery; +DROP PROCEDURE IF EXISTS {{.output_schema}}.commit_table; +DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_column_versions; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/00-setup-users.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/00-setup-users.sql new file mode 100644 index 00000000..100decb2 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/00-setup-users.sql @@ -0,0 +1,153 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DECLARE RUN_ID TIMESTAMP; + +-- A table storing an identifier for this run of a model - used to identify runs of the model across multiple modules/steps (eg. base, page views share this id per run) +CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + run_id TIMESTAMP +); + +-- Insert new run_id if one doesn't exist +SET RUN_ID = (SELECT run_id FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} LIMIT 1); + +IF RUN_ID IS NULL THEN + INSERT INTO {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} ( + SELECT + CURRENT_TIMESTAMP() + ); +END IF; + +-- Permanent metadata table +CREATE TABLE IF NOT EXISTS {{.output_schema}}.datamodel_metadata{{.entropy}} ( + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +); + +-- Setup Metadata +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_metadata_this_run{{.entropy}} ( + id STRING, + run_id TIMESTAMP, + model_version STRING, + model STRING, + module STRING, + run_start_tstamp TIMESTAMP, + run_end_tstamp TIMESTAMP, + rows_this_run INT64, + distinct_key STRING, + distinct_key_count INT64, + time_key STRING, + min_time_key TIMESTAMP, + max_time_key TIMESTAMP, + duplicate_rows_removed INT64, + distinct_keys_removed INT64 +); + +INSERT {{.scratch_schema}}.mobile_users_metadata_this_run{{.entropy}} ( + SELECT + 'run', + run_id, + '{{.model_version}}', + 'mobile', + 'users', + CURRENT_TIMESTAMP(), + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + + FROM {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}} +); + +CREATE TABLE IF NOT EXISTS {{.output_schema}}.mobile_users_manifest{{.entropy}} ( + device_user_id STRING, + start_tstamp TIMESTAMP +) +PARTITION BY DATE(start_tstamp) +CLUSTER BY device_user_id; + +CREATE TABLE IF NOT EXISTS {{.output_schema}}.mobile_users{{.entropy}} ( + + user_id STRING, + device_user_id STRING, + network_userid STRING, + + start_tstamp TIMESTAMP, + end_tstamp TIMESTAMP, + model_tstamp TIMESTAMP, + + screen_views INT64, + screen_names_viewed INT64, + sessions INT64, + sessions_duration_s INT64, + active_days INT64, + + app_errors INT64, + fatal_app_errors INT64, + + first_screen_view_name STRING, + first_screen_view_transition_type STRING, + first_screen_view_type STRING, + last_screen_view_name STRING, + last_screen_view_transition_type STRING, + last_screen_view_type STRING, + + platform STRING, + dvce_screenwidth INT64, + dvce_screenheight INT64, + device_manufacturer STRING, + device_model STRING, + os_type STRING, + first_os_version STRING, + last_os_version STRING, + android_idfa STRING, + apple_idfa STRING, + apple_idfv STRING, + open_idfa STRING, + + geo_country STRING, + geo_region STRING, + geo_city STRING, + geo_zipcode STRING, + geo_latitude FLOAT64, + geo_longitude FLOAT64, + geo_region_name STRING, + geo_timezone STRING, + + first_carrier STRING, + last_carrier STRING + +) +PARTITION BY DATE(start_tstamp) +CLUSTER BY {{range $i, $cluster_field := .cluster_by}} {{if lt $i 4}} {{if $i}}, {{end}} {{$cluster_field}} {{end}} {{else}} device_user_id {{end}}; +--Cluster using `.cluster_by` var, else use defaults. Max 4 cluster by fields allowed diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/01-userids-this-run.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/01-userids-this-run.sql new file mode 100644 index 00000000..e668ba7d --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/01-userids-this-run.sql @@ -0,0 +1,32 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Create a limit for this run - single value table. +DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}}; + +CREATE TABLE {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}} +AS( + SELECT + a.device_user_id, + -- LEAST produces NULL if any input value is null + LEAST(a.start_tstamp, COALESCE(b.start_tstamp, a.start_tstamp)) AS start_tstamp + + FROM + {{.scratch_schema}}.mobile_sessions_userid_manifest_staged{{.entropy}} a + LEFT JOIN + {{.output_schema}}.mobile_users_manifest{{.entropy}} b + ON a.device_user_id = b.device_user_id +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/02-users-limits.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/02-users-limits.sql new file mode 100644 index 00000000..cebe8164 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/02-users-limits.sql @@ -0,0 +1,25 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +-- Create a limit for this run +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_limits{{.entropy}} AS( + SELECT + MIN(start_tstamp) AS lower_limit, + MAX(start_tstamp) AS upper_limit + + FROM + {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}} +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/03-users-sessions-this-run.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/03-users-sessions-this-run.sql new file mode 100644 index 00000000..5b5d8bc0 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/03-users-sessions-this-run.sql @@ -0,0 +1,32 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DECLARE LOWER_LIMIT, UPPER_LIMIT TIMESTAMP; + +SET (LOWER_LIMIT, UPPER_LIMIT) = (SELECT AS STRUCT lower_limit, upper_limit FROM {{.scratch_schema}}.mobile_users_limits{{.entropy}}); + +-- Create a limit for this run - single value table. +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_sessions_this_run{{.entropy}} +AS( + SELECT + a.* + FROM {{.output_schema}}.mobile_sessions{{.entropy}} a + INNER JOIN {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}} b + ON a.device_user_id = b.device_user_id + + WHERE a.start_tstamp >= LOWER_LIMIT + AND a.start_tstamp <= UPPER_LIMIT +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/04-users-aggs.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/04-users-aggs.sql new file mode 100644 index 00000000..4327b05f --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/04-users-aggs.sql @@ -0,0 +1,37 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_aggregates{{.entropy}} +AS( + SELECT + device_user_id, + -- time + MIN(start_tstamp) AS start_tstamp, + MAX(end_tstamp) AS end_tstamp, + -- engagement + SUM(screen_views) AS screen_views, + SUM(screen_names_viewed) AS screen_names_viewed, + COUNT(DISTINCT session_id) AS sessions, + SUM(session_duration_s) AS sessions_duration_s, + COUNT(DISTINCT DATE(start_tstamp)) AS active_days, + --errors + SUM(app_errors) AS app_errors, + SUM(fatal_app_errors) AS fatal_app_errors + + FROM {{.scratch_schema}}.mobile_users_sessions_this_run{{.entropy}} + + GROUP BY 1 +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/05-users-lasts.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/05-users-lasts.sql new file mode 100644 index 00000000..43a33477 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/05-users-lasts.sql @@ -0,0 +1,37 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_lasts{{.entropy}} +AS( + + SELECT + a.device_user_id, + a.last_screen_view_name, + a.last_screen_view_transition_type, + a.last_screen_view_type, + + a.carrier AS last_carrier, + a.os_version AS last_os_version + + FROM + {{.scratch_schema}}.mobile_users_sessions_this_run{{.entropy}} a + + INNER JOIN + {{.scratch_schema}}.mobile_users_aggregates{{.entropy}} b + ON a.device_user_id = b.device_user_id + AND a.end_tstamp = b.end_tstamp + +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/06-users.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/06-users.sql new file mode 100644 index 00000000..419a62cb --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/06-users.sql @@ -0,0 +1,86 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_this_run{{.entropy}} +PARTITION BY DATE(start_tstamp) +AS ( + + SELECT + -- user fields + a.user_id, + a.device_user_id, + a.network_userid, + + b.start_tstamp, + b.end_tstamp, + CURRENT_TIMESTAMP() AS model_tstamp, + + -- engagement fields + b.screen_views, + b.screen_names_viewed, + b.sessions, + b.sessions_duration_s, + b.active_days, + --errors + b.app_errors, + b.fatal_app_errors, + + -- screen fields + a.first_screen_view_name, + a.first_screen_view_transition_type, + a.first_screen_view_type, + + c.last_screen_view_name, + c.last_screen_view_transition_type, + c.last_screen_view_type, + + -- device fields + a.platform, + a.dvce_screenwidth, + a.dvce_screenheight, + a.device_manufacturer, + a.device_model, + a.os_type, + a.os_version first_os_version, + c.last_os_version, + a.android_idfa, + a.apple_idfa, + a.apple_idfv, + a.open_idfa, + + -- geo fields + a.geo_country, + a.geo_region, + a.geo_city, + a.geo_zipcode, + a.geo_latitude, + a.geo_longitude, + a.geo_region_name, + a.geo_timezone, + + a.carrier first_carrier, + c.last_carrier + + FROM {{.scratch_schema}}.mobile_users_aggregates{{.entropy}} AS b + + INNER JOIN {{.scratch_schema}}.mobile_users_sessions_this_run{{.entropy}} AS a + ON a.device_user_id = b.device_user_id + AND a.start_tstamp = b.start_tstamp + + INNER JOIN {{.scratch_schema}}.mobile_users_lasts{{.entropy}} c + ON b.device_user_id = c.device_user_id + +); diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/07-users-metadata.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/07-users-metadata.sql new file mode 100644 index 00000000..33341983 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/07-users-metadata.sql @@ -0,0 +1,40 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_users_run_metadata_temp{{.entropy}} AS ( + SELECT + 'run' AS id, + count(*) AS rows_this_run, + 'device_user_id' AS distinct_key, + count(DISTINCT device_user_id) AS distinct_key_count, + 'start_tstamp' AS time_key, + MIN(start_tstamp) AS min_time_key, + MAX(start_tstamp) AS max_time_key + + FROM + {{.scratch_schema}}.mobile_users_this_run{{.entropy}} +); + +UPDATE {{.scratch_schema}}.mobile_users_metadata_this_run{{.entropy}} a + SET + rows_this_run = b.rows_this_run, + distinct_key = b.distinct_key, + distinct_key_count = b.distinct_key_count, + time_key = b.time_key, + min_time_key = b.min_time_key, + max_time_key = b.max_time_key + FROM {{.scratch_schema}}.mobile_users_run_metadata_temp{{.entropy}} b + WHERE a.id = b.id; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/08-commit-users.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/08-commit-users.sql new file mode 100644 index 00000000..79eef0b7 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/01-main/08-commit-users.sql @@ -0,0 +1,53 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +BEGIN + + {{if ne (or .skip_derived false) true}} + + -- Commit to production if enabled + -- Note: Automigrate hardcoded to false as all columns are to be explicitly defined in model. + CALL {{.output_schema}}.commit_table('{{.scratch_schema}}', -- sourceDataset + 'mobile_users_this_run{{.entropy}}', -- sourceTable + '{{.output_schema}}', -- targetDataset + 'mobile_users{{.entropy}}', -- targetTable + 'device_user_id', -- joinKey + 'start_tstamp', -- partitionKey + FALSE); -- automigrate + + {{end}} + + -- Commit metadata + INSERT INTO {{.output_schema}}.datamodel_metadata{{.entropy}} ( + SELECT + run_id, + model_version, + model, + module, + run_start_tstamp, + CURRENT_TIMESTAMP() AS run_end_tstamp, + rows_this_run, + distinct_key, + distinct_key_count, + time_key, + min_time_key, + max_time_key, + duplicate_rows_removed, + distinct_keys_removed + FROM {{.scratch_schema}}.mobile_users_metadata_this_run{{.entropy}} + ); + +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/99-complete/98-manifest-and-truncate.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/99-complete/98-manifest-and-truncate.sql new file mode 100644 index 00000000..42bc8c6c --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/99-complete/98-manifest-and-truncate.sql @@ -0,0 +1,29 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +BEGIN + + -- Update manifest + DELETE + FROM {{.output_schema}}.mobile_users_manifest{{.entropy}} + WHERE device_user_id IN (SELECT device_user_id FROM {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}}); + + INSERT INTO {{.output_schema}}.mobile_users_manifest{{.entropy}} (SELECT * FROM {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}}); + + -- Truncate input table just processed + TRUNCATE TABLE {{.scratch_schema}}.mobile_sessions_userid_manifest_staged{{.entropy}}; + +END; diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/99-complete/99-users-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/99-complete/99-users-cleanup.sql new file mode 100644 index 00000000..c801c807 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/99-complete/99-users-cleanup.sql @@ -0,0 +1,44 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq .cleanup_mode "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_aggregates{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_lasts{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_run_metadata_temp{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_metadata_this_run{{.entropy}}; + +{{end}} + +{{if eq .cleanup_mode "debug" "trace"}} SELECT 1; {{else}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_userids_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_limits{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_this_run{{.entropy}}; + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_users_sessions_this_run{{.entropy}}; + DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_field_versions; + DROP PROCEDURE IF EXISTS {{.output_schema}}.concat_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; + DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; + +{{end}} + +{{if eq .ends_run true}} + + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_metadata_run_id{{.entropy}}; + +{{end}} diff --git a/mobile/v1/bigquery/sql-runner/sql/standard/05-users/XX-destroy/XX-destroy-users.sql b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/XX-destroy/XX-destroy-users.sql new file mode 100644 index 00000000..c3db7544 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/standard/05-users/XX-destroy/XX-destroy-users.sql @@ -0,0 +1,24 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +DROP TABLE IF EXISTS {{.output_schema}}.mobile_users{{.entropy}}; +DROP TABLE IF EXISTS {{.output_schema}}.mobile_users_manifest{{.entropy}}; +DROP FUNCTION IF EXISTS {{.output_schema}}.columnCheckQuery; +DROP PROCEDURE IF EXISTS {{.output_schema}}.commit_table; +DROP PROCEDURE IF EXISTS {{.output_schema}}.combine_column_versions; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_app_errors_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_mobile_context_fields; +DROP PROCEDURE IF EXISTS {{.output_schema}}.mobile_session_context_fields; diff --git a/mobile/v1/bigquery/sql-runner/sql/tests/00-staging-reconciliation/01-main/00-staging-reconciliation.sql b/mobile/v1/bigquery/sql-runner/sql/tests/00-staging-reconciliation/01-main/00-staging-reconciliation.sql new file mode 100644 index 00000000..65b2d869 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/tests/00-staging-reconciliation/01-main/00-staging-reconciliation.sql @@ -0,0 +1,118 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +CREATE OR REPLACE TABLE {{.scratch_schema}}.mobile_staging_reconciliation{{.entropy}} +AS ( + + WITH events AS ( + SELECT + '1' AS _pk, + COUNT(DISTINCT event_id) AS distinct_event_ids, + SUM(CASE WHEN event_name = 'screen_view' THEN 1 END) AS screen_view_rows, + COUNT(DISTINCT CASE WHEN event_name = 'screen_view' THEN event_id END) AS distinct_sv_event_ids, + COUNT(DISTINCT session_id) AS distinct_session_ids, + SUM(CASE WHEN event_index_in_session = 1 THEN 1 END) AS sessions_rows, + COUNT(DISTINCT CASE WHEN event_name = 'screen_view' THEN session_id END) AS distinct_session_ids_w_screen_view, + COUNT(DISTINCT CASE WHEN event_name = 'application_error' THEN event_id END) AS app_error_distinct_event_ids, + SUM(CASE WHEN event_name = 'application_error' THEN 1 END) AS app_error_row_count + + FROM {{.scratch_schema}}.mobile_events_staged{{.entropy}} + GROUP BY 1 + ) + + , screen_views AS ( + SELECT + '1' AS _pk, + COUNT(DISTINCT event_id) AS distinct_sv_event_ids, + COUNT(DISTINCT screen_view_id) AS distinct_screen_view_ids, + COUNT(DISTINCT session_id) AS distinct_session_ids, + COUNT(*) AS screen_view_rows + + FROM {{.scratch_schema}}.mobile_screen_views_staged{{.entropy}} + GROUP BY 1 + ) + + --Not valid if screen views run multiple times to staging. + , screen_view_removed_dupes AS ( + SELECT + '1' AS _pk, + COUNT(*) removed_screen_view_rows + + FROM ( + SELECT + e.screen_view_id, + ROW_NUMBER() OVER(PARTITION BY e.screen_view_id ORDER BY e.derived_tstamp) AS row_num + + FROM {{.scratch_schema}}.mobile_events_staged e + WHERE e.event_name = 'screen_view' + AND e.screen_view_id IS NOT NULL) + WHERE row_num != 1 + GROUP BY 1 + ) + + , app_errors AS ( + SELECT + '1' AS _pk, + COUNT(DISTINCT event_id) AS distinct_app_errors_event_id, + COUNT(*) AS app_error_rows + + FROM {{.scratch_schema}}.mobile_app_errors_staged{{.entropy}} + GROUP BY 1 + ) + + , sessions AS ( + SELECT + '1' AS _pk, + COUNT(DISTINCT session_id) AS distinct_session_ids, + SUM(screen_views) AS distinct_screen_view_ids, + COUNT(*) AS sessions_rows, + SUM(app_errors) AS app_errors + + FROM {{.scratch_schema}}.mobile_sessions_this_run{{.entropy}} + GROUP BY 1 + ) + + SELECT + e._pk, + IFNULL(e.screen_view_rows,0) - IFNULL(sv.screen_view_rows,0) - IFNULL(svd.removed_screen_view_rows,0) AS ev_to_sv_sv_rows, + IFNULL(e.distinct_sv_event_ids,0) - IFNULL(sv.distinct_sv_event_ids,0) AS ev_to_sv_distinct_event_ids, + IFNULL(e.sessions_rows,0) - IFNULL(s.sessions_rows,0) AS ev_to_sess_session_rows, + IFNULL(e.distinct_session_ids,0) - IFNULL(s.distinct_session_ids,0) AS ev_to_sess_distinct_session_ids, + IFNULL(e.distinct_session_ids_w_screen_view,0) - IFNULL(sv.distinct_session_ids,0) AS ev_to_sv_distinct_session_ids, + {{if eq (or .app_errors false) true}} + --Only evaluate if module enabled + IFNULL(e.app_error_distinct_event_ids,0) - IFNULL(ae.distinct_app_errors_event_id,0) AS ev_to_ae_distinct_event_ids, + IFNULL(e.app_error_row_count,0) - IFNULL(ae.app_error_rows,0) AS ev_to_ae_row_count, + {{else}} + 0 AS ev_to_ae_distinct_event_ids, + 0 AS ev_to_ae_row_count, + {{end}} + IFNULL(sv.distinct_screen_view_ids,0) -IFNULL(s.distinct_screen_view_ids,0) AS sv_to_sess_sv_distinct_screen_view_ids, + IFNULL(ae.distinct_app_errors_event_id,0) - IFNULL(s.app_errors,0) AS ae_to_sess_app_errors + + FROM events e + LEFT JOIN screen_views sv + ON e._pk = sv._pk + LEFT JOIN screen_view_removed_dupes svd + ON e._pk = svd._pk + LEFT JOIN app_errors ae + ON e._pk = ae._pk + LEFT JOIN sessions s + ON e._pk = s._pk + +); + + diff --git a/mobile/v1/bigquery/sql-runner/sql/tests/00-staging-reconciliation/99-complete/99-staging-reconciliation-cleanup.sql b/mobile/v1/bigquery/sql-runner/sql/tests/00-staging-reconciliation/99-complete/99-staging-reconciliation-cleanup.sql new file mode 100644 index 00000000..de8a7829 --- /dev/null +++ b/mobile/v1/bigquery/sql-runner/sql/tests/00-staging-reconciliation/99-complete/99-staging-reconciliation-cleanup.sql @@ -0,0 +1,20 @@ +/* + Copyright 2021 Snowplow Analytics Ltd. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{{if eq .cleanup_mode "debug" "trace"}} SELECT 1; {{else}} + DROP TABLE IF EXISTS {{.scratch_schema}}.mobile_staging_reconciliation{{.entropy}}; +{{end}} +