From 24b83da7050d7acc681c4dcc0cb4f1031ed3ac51 Mon Sep 17 00:00:00 2001 From: KayUnkroth Date: Tue, 18 Jul 2017 14:47:58 -0700 Subject: [PATCH] Modelling script files for Using Azure Analysis Services on Top of Azure Data Lake Storage blog article. --- UsqlScripts/Modelling/call_center.usql | 75 ++++++++++++++++++ UsqlScripts/Modelling/catalog_page.usql | 31 ++++++++ UsqlScripts/Modelling/catalog_returns.usql | 54 +++++++++++++ UsqlScripts/Modelling/catalog_sales.usql | 76 +++++++++++++++++++ UsqlScripts/Modelling/customer.usql | 49 ++++++++++++ UsqlScripts/Modelling/customer_address.usql | 39 ++++++++++ .../Modelling/customer_demographics.usql | 31 ++++++++ UsqlScripts/Modelling/date_dim.usql | 69 +++++++++++++++++ UsqlScripts/Modelling/dbgen_version.usql | 21 +++++ .../Modelling/household_demographics.usql | 23 ++++++ UsqlScripts/Modelling/income_band.usql | 19 +++++ UsqlScripts/Modelling/inventory.usql | 21 +++++ UsqlScripts/Modelling/item.usql | 57 ++++++++++++++ UsqlScripts/Modelling/promotion.usql | 51 +++++++++++++ UsqlScripts/Modelling/reason.usql | 19 +++++ UsqlScripts/Modelling/ship_mode.usql | 25 ++++++ UsqlScripts/Modelling/store.usql | 71 +++++++++++++++++ UsqlScripts/Modelling/store_returns.usql | 44 +++++++++++ UsqlScripts/Modelling/store_sales.usql | 58 ++++++++++++++ UsqlScripts/Modelling/time_dim.usql | 33 ++++++++ UsqlScripts/Modelling/warehouse.usql | 41 ++++++++++ UsqlScripts/Modelling/web_page.usql | 41 ++++++++++ UsqlScripts/Modelling/web_returns.usql | 52 +++++++++++++ UsqlScripts/Modelling/web_sales.usql | 72 ++++++++++++++++++ UsqlScripts/Modelling/web_site.usql | 65 ++++++++++++++++ 25 files changed, 1137 insertions(+) create mode 100644 UsqlScripts/Modelling/call_center.usql create mode 100644 UsqlScripts/Modelling/catalog_page.usql create mode 100644 UsqlScripts/Modelling/catalog_returns.usql create mode 100644 UsqlScripts/Modelling/catalog_sales.usql create mode 100644 UsqlScripts/Modelling/customer.usql create mode 100644 UsqlScripts/Modelling/customer_address.usql create mode 100644 UsqlScripts/Modelling/customer_demographics.usql create mode 100644 UsqlScripts/Modelling/date_dim.usql create mode 100644 UsqlScripts/Modelling/dbgen_version.usql create mode 100644 UsqlScripts/Modelling/household_demographics.usql create mode 100644 UsqlScripts/Modelling/income_band.usql create mode 100644 UsqlScripts/Modelling/inventory.usql create mode 100644 UsqlScripts/Modelling/item.usql create mode 100644 UsqlScripts/Modelling/promotion.usql create mode 100644 UsqlScripts/Modelling/reason.usql create mode 100644 UsqlScripts/Modelling/ship_mode.usql create mode 100644 UsqlScripts/Modelling/store.usql create mode 100644 UsqlScripts/Modelling/store_returns.usql create mode 100644 UsqlScripts/Modelling/store_sales.usql create mode 100644 UsqlScripts/Modelling/time_dim.usql create mode 100644 UsqlScripts/Modelling/warehouse.usql create mode 100644 UsqlScripts/Modelling/web_page.usql create mode 100644 UsqlScripts/Modelling/web_returns.usql create mode 100644 UsqlScripts/Modelling/web_sales.usql create mode 100644 UsqlScripts/Modelling/web_site.usql diff --git a/UsqlScripts/Modelling/call_center.usql b/UsqlScripts/Modelling/call_center.usql new file mode 100644 index 0000000..a223af7 --- /dev/null +++ b/UsqlScripts/Modelling/call_center.usql @@ -0,0 +1,75 @@ +@raw_parsed = EXTRACT child_id int, + cc_call_center_sk string, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk string, + cc_open_date_sk string, + cc_name string, + cc_class string, + cc_employees string, + cc_sq_ft string, + cc_hours string, + cc_manager string, + cc_mkt_id string, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division string, + cc_division_name string, + cc_company string, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset string, + cc_tax_percentage string, + empty string +FROM "wasb://call-center@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cc_call_center_sk, + cc_call_center_id, + cc_rec_start_date, + cc_rec_end_date, + cc_closed_date_sk, + cc_open_date_sk, + cc_name, + cc_class, + cc_employees, + cc_sq_ft, + cc_hours, + cc_manager, + cc_mkt_id, + cc_mkt_class, + cc_mkt_desc, + cc_market_manager, + cc_division, + cc_division_name, + cc_company, + cc_company_name, + cc_street_number, + cc_street_name, + cc_street_type, + cc_suite_number, + cc_city, + cc_county, + cc_state, + cc_zip, + cc_country, + cc_gmt_offset, + cc_tax_percentage +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/call_center.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/catalog_page.usql b/UsqlScripts/Modelling/catalog_page.usql new file mode 100644 index 0000000..085e593 --- /dev/null +++ b/UsqlScripts/Modelling/catalog_page.usql @@ -0,0 +1,31 @@ +@raw_parsed = EXTRACT child_id int, + cp_catalog_page_sk string, + cp_catalog_page_id string, + cp_start_date_sk string, + cp_end_date_sk string, + cp_department string, + cp_catalog_number string, + cp_catalog_page_number string, + cp_description string, + cp_type string, + empty string +FROM "wasb://catalog-page@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cp_catalog_page_sk, + cp_catalog_page_id, + cp_start_date_sk, + cp_end_date_sk, + cp_department, + cp_catalog_number, + cp_catalog_page_number, + cp_description, + cp_type +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/catalog_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/catalog_returns.usql b/UsqlScripts/Modelling/catalog_returns.usql new file mode 100644 index 0000000..f0af0e9 --- /dev/null +++ b/UsqlScripts/Modelling/catalog_returns.usql @@ -0,0 +1,54 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/catalog_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/catalog_sales.usql b/UsqlScripts/Modelling/catalog_sales.usql new file mode 100644 index 0000000..2ed2a35 --- /dev/null +++ b/UsqlScripts/Modelling/catalog_sales.usql @@ -0,0 +1,76 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/catalog_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/customer.usql b/UsqlScripts/Modelling/customer.usql new file mode 100644 index 0000000..350118c --- /dev/null +++ b/UsqlScripts/Modelling/customer.usql @@ -0,0 +1,49 @@ +@raw_parsed = EXTRACT child_id int, + c_customer_sk string, + c_customer_id string, + c_current_cdemo_sk string, + c_current_hdemo_sk string, + c_current_addr_sk string, + c_first_shipto_date_sk string, + c_first_sales_date_sk string, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day string, + c_birth_month string, + c_birth_year string, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string, + empty string +FROM "wasb://customer@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT c_customer_sk, + c_customer_id, + c_current_cdemo_sk, + c_current_hdemo_sk, + c_current_addr_sk, + c_first_shipto_date_sk, + c_first_sales_date_sk, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/customer.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/customer_address.usql b/UsqlScripts/Modelling/customer_address.usql new file mode 100644 index 0000000..528df70 --- /dev/null +++ b/UsqlScripts/Modelling/customer_address.usql @@ -0,0 +1,39 @@ +@raw_parsed = EXTRACT child_id int, + ca_address_sk string, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset string, + ca_location_type string, + empty string +FROM "wasb://customer-address@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ca_address_sk, + ca_address_id, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/customer_address.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/customer_demographics.usql b/UsqlScripts/Modelling/customer_demographics.usql new file mode 100644 index 0000000..97b9471 --- /dev/null +++ b/UsqlScripts/Modelling/customer_demographics.usql @@ -0,0 +1,31 @@ +@raw_parsed = EXTRACT child_id int, + cd_demo_sk string, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate string, + cd_credit_rating string, + cd_dep_count string, + cd_dep_employed_count string, + cd_dep_college_count string, + empty string +FROM "wasb://customer-demographics@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/customer_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/date_dim.usql b/UsqlScripts/Modelling/date_dim.usql new file mode 100644 index 0000000..8a67947 --- /dev/null +++ b/UsqlScripts/Modelling/date_dim.usql @@ -0,0 +1,69 @@ +@raw_parsed = EXTRACT child_id int, + d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year string, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT d_date_sk, + d_date_id, + d_date, + d_month_seq, + d_week_seq, + d_quarter_seq, + d_year, + d_dow, + d_moy, + d_dom, + d_qoy, + d_fy_year, + d_fy_quarter_seq, + d_fy_week_seq, + d_day_name, + d_quarter_name, + d_holiday, + d_weekend, + d_following_holiday, + d_first_dom, + d_last_dom, + d_same_day_ly, + d_same_day_lq, + d_current_day, + d_current_week, + d_current_month, + d_current_quarter, + d_current_year +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/date_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/dbgen_version.usql b/UsqlScripts/Modelling/dbgen_version.usql new file mode 100644 index 0000000..83fdad8 --- /dev/null +++ b/UsqlScripts/Modelling/dbgen_version.usql @@ -0,0 +1,21 @@ +@raw_parsed = EXTRACT child_id int, + dv_version string, + dv_create_date string, + dv_create_time string, + dv_cmdline_args string, + empty string +FROM "wasb://dbgen-version@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT dv_version, + dv_create_date, + dv_create_time, + dv_cmdline_args +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/dbgen_version.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/household_demographics.usql b/UsqlScripts/Modelling/household_demographics.usql new file mode 100644 index 0000000..4aaac96 --- /dev/null +++ b/UsqlScripts/Modelling/household_demographics.usql @@ -0,0 +1,23 @@ +@raw_parsed = EXTRACT child_id int, + hd_demo_sk string, + hd_income_band_sk string, + hd_buy_potential string, + hd_dep_count string, + hd_vehicle_count string, + empty string +FROM "wasb://household-demographics@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT hd_demo_sk, + hd_income_band_sk, + hd_buy_potential, + hd_dep_count, + hd_vehicle_count +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/household_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/income_band.usql b/UsqlScripts/Modelling/income_band.usql new file mode 100644 index 0000000..6be786f --- /dev/null +++ b/UsqlScripts/Modelling/income_band.usql @@ -0,0 +1,19 @@ +@raw_parsed = EXTRACT child_id int, + b_income_band_sk string, + b_lower_bound string, + b_upper_bound string, + empty string +FROM "wasb://income-band@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT b_income_band_sk, + b_lower_bound, + b_upper_bound +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/income_band.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/inventory.usql b/UsqlScripts/Modelling/inventory.usql new file mode 100644 index 0000000..7d204c7 --- /dev/null +++ b/UsqlScripts/Modelling/inventory.usql @@ -0,0 +1,21 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/inventory.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/item.usql b/UsqlScripts/Modelling/item.usql new file mode 100644 index 0000000..769073a --- /dev/null +++ b/UsqlScripts/Modelling/item.usql @@ -0,0 +1,57 @@ +@raw_parsed = EXTRACT child_id int, + _item_sk string, + _item_id string, + _rec_start_date string, + _rec_end_date string, + _item_desc string, + _current_price string, + _wholesale_cost string, + _brand_id string, + _brand string, + _class_id string, + _class string, + _category_id string, + _category string, + _manufact_id string, + _manufact string, + _size string, + _formulation string, + _color string, + _units string, + _container string, + _manager_id string, + _product_name string, + empty string +FROM "wasb://item@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _item_sk, + _item_id, + _rec_start_date, + _rec_end_date, + _item_desc, + _current_price, + _wholesale_cost, + _brand_id, + _brand, + _class_id, + _class, + _category_id, + _category, + _manufact_id, + _manufact, + _size, + _formulation, + _color, + _units, + _container, + _manager_id, + _product_name +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/item.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/promotion.usql b/UsqlScripts/Modelling/promotion.usql new file mode 100644 index 0000000..27a4923 --- /dev/null +++ b/UsqlScripts/Modelling/promotion.usql @@ -0,0 +1,51 @@ +@raw_parsed = EXTRACT child_id int, + _promo_sk string, + _promo_id string, + _start_date_sk string, + _end_date_sk string, + _item_sk string, + _cost string, + _response_target string, + _promo_name string, + _channel_dmail string, + _channel_email string, + _channel_catalog string, + _channel_tv string, + _channel_radio string, + _channel_press string, + _channel_event string, + _channel_demo string, + _channel_details string, + _purpose string, + _discount_active string, + empty string +FROM "wasb://promotion@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _promo_sk, + _promo_id, + _start_date_sk, + _end_date_sk, + _item_sk, + _cost, + _response_target, + _promo_name, + _channel_dmail, + _channel_email, + _channel_catalog, + _channel_tv, + _channel_radio, + _channel_press, + _channel_event, + _channel_demo, + _channel_details, + _purpose, + _discount_active +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/promotion.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/reason.usql b/UsqlScripts/Modelling/reason.usql new file mode 100644 index 0000000..1a7028a --- /dev/null +++ b/UsqlScripts/Modelling/reason.usql @@ -0,0 +1,19 @@ +@raw_parsed = EXTRACT child_id int, + _reason_sk string, + _reason_id string, + _reason_desc string, + empty string +FROM "wasb://reason@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _reason_sk, + _reason_id, + _reason_desc +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/reason.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/ship_mode.usql b/UsqlScripts/Modelling/ship_mode.usql new file mode 100644 index 0000000..9de4e66 --- /dev/null +++ b/UsqlScripts/Modelling/ship_mode.usql @@ -0,0 +1,25 @@ +@raw_parsed = EXTRACT child_id int, + sm_ship_mode_sk string, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string, + empty string +FROM "wasb://ship-mode@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sm_ship_mode_sk, + sm_ship_mode_id, + sm_type, + sm_code, + sm_carrier, + sm_contract +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/ship_mode.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/store.usql b/UsqlScripts/Modelling/store.usql new file mode 100644 index 0000000..05ec370 --- /dev/null +++ b/UsqlScripts/Modelling/store.usql @@ -0,0 +1,71 @@ +@raw_parsed = EXTRACT child_id int, + s_store_sk string, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk string, + s_store_name string, + s_number_employees string, + s_floor_space string, + s_hours string, + s_manager string, + s_market_id string, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id string, + s_division_name string, + s_company_id string, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset string, + s_tax_precentage string, + empty string +FROM "wasb://store@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT s_store_sk, + s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk, + s_store_name, + s_number_employees, + s_floor_space, + s_hours, + s_manager, + s_market_id, + s_geography_class, + s_market_desc, + s_market_manager, + s_division_id, + s_division_name, + s_company_id, + s_company_name, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + s_country, + s_gmt_offset, + s_tax_precentage +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/store.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/store_returns.usql b/UsqlScripts/Modelling/store_returns.usql new file mode 100644 index 0000000..44b0db9 --- /dev/null +++ b/UsqlScripts/Modelling/store_returns.usql @@ -0,0 +1,44 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/store_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/store_sales.usql b/UsqlScripts/Modelling/store_sales.usql new file mode 100644 index 0000000..a14a1a5 --- /dev/null +++ b/UsqlScripts/Modelling/store_sales.usql @@ -0,0 +1,58 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/store_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/time_dim.usql b/UsqlScripts/Modelling/time_dim.usql new file mode 100644 index 0000000..b631ab8 --- /dev/null +++ b/UsqlScripts/Modelling/time_dim.usql @@ -0,0 +1,33 @@ +@raw_parsed = EXTRACT child_id int, + t_time_sk string, + t_time_id string, + t_time string, + t_hour string, + t_minute string, + t_second string, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string, + empty string +FROM "wasb://time-dim@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT t_time_sk, + t_time_id, + t_time, + t_hour, + t_minute, + t_second, + t_am_pm, + t_shift, + t_sub_shift, + t_meal_time +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/time_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/warehouse.usql b/UsqlScripts/Modelling/warehouse.usql new file mode 100644 index 0000000..c0a1145 --- /dev/null +++ b/UsqlScripts/Modelling/warehouse.usql @@ -0,0 +1,41 @@ +@raw_parsed = EXTRACT child_id int, + w_warehouse_sk string, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft string, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset string, + empty string +FROM "wasb://warehouse@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT w_warehouse_sk, + w_warehouse_id, + w_warehouse_name, + w_warehouse_sq_ft, + w_street_number, + w_street_name, + w_street_type, + w_suite_number, + w_city, + w_county, + w_state, + w_zip, + w_country, + w_gmt_offset +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/warehouse.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/web_page.usql b/UsqlScripts/Modelling/web_page.usql new file mode 100644 index 0000000..588c592 --- /dev/null +++ b/UsqlScripts/Modelling/web_page.usql @@ -0,0 +1,41 @@ +@raw_parsed = EXTRACT child_id int, + wp_web_page_sk string, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk string, + wp_access_date_sk string, + wp_autogen_flag string, + wp_customer_sk string, + wp_url string, + wp_type string, + wp_char_count string, + wp_link_count string, + wp_image_count string, + wp_max_ad_count string, + empty string +FROM "wasb://web-page@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wp_web_page_sk, + wp_web_page_id, + wp_rec_start_date, + wp_rec_end_date, + wp_creation_date_sk, + wp_access_date_sk, + wp_autogen_flag, + wp_customer_sk, + wp_url, + wp_type, + wp_char_count, + wp_link_count, + wp_image_count, + wp_max_ad_count +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/web_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/web_returns.usql b/UsqlScripts/Modelling/web_returns.usql new file mode 100644 index 0000000..15b74d3 --- /dev/null +++ b/UsqlScripts/Modelling/web_returns.usql @@ -0,0 +1,52 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/web_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/web_sales.usql b/UsqlScripts/Modelling/web_sales.usql new file mode 100644 index 0000000..a9ab339 --- /dev/null +++ b/UsqlScripts/Modelling/web_sales.usql @@ -0,0 +1,72 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/web_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/Modelling/web_site.usql b/UsqlScripts/Modelling/web_site.usql new file mode 100644 index 0000000..a8c32f4 --- /dev/null +++ b/UsqlScripts/Modelling/web_site.usql @@ -0,0 +1,65 @@ +@raw_parsed = EXTRACT child_id int, + web_site_sk string, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk string, + web_close_date_sk string, + web_class string, + web_manager string, + web_mkt_id string, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id string, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset string, + web_tax_percentage string, + empty string +FROM "wasb://web-site@aasuseast2/{*}_{child_id:*}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT web_site_sk, + web_site_id, + web_rec_start_date, + web_rec_end_date, + web_name, + web_open_date_sk, + web_close_date_sk, + web_class, + web_manager, + web_mkt_id, + web_mkt_class, + web_mkt_desc, + web_market_manager, + web_company_id, + web_company_name, + web_street_number, + web_street_name, + web_street_type, + web_suite_number, + web_city, + web_county, + web_state, + web_zip, + web_country, + web_gmt_offset, + web_tax_percentage +FROM @raw_parsed +ORDER BY child_id ASC +FETCH 100 ROWS; + +OUTPUT @filtered_results +TO "/modelling/web_site.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file