diff --git a/BismNormalizer/BismNormalizer.CommandLine/App.config b/BismNormalizer/BismNormalizer.CommandLine/App.config index 959051b..a375bc0 100644 --- a/BismNormalizer/BismNormalizer.CommandLine/App.config +++ b/BismNormalizer/BismNormalizer.CommandLine/App.config @@ -3,12 +3,4 @@ - - - - - - - - diff --git a/BismNormalizer/BismNormalizer.CommandLine/Properties/AssemblyInfo.cs b/BismNormalizer/BismNormalizer.CommandLine/Properties/AssemblyInfo.cs index 241b25f..84eaaef 100644 --- a/BismNormalizer/BismNormalizer.CommandLine/Properties/AssemblyInfo.cs +++ b/BismNormalizer/BismNormalizer.CommandLine/Properties/AssemblyInfo.cs @@ -32,5 +32,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("4.0.0.13")] -[assembly: AssemblyFileVersion("4.0.0.13")] +[assembly: AssemblyVersion("4.0.0.14")] +[assembly: AssemblyFileVersion("4.0.0.14")] diff --git a/BismNormalizer/BismNormalizer.IconSetup/Properties/AssemblyInfo.cs b/BismNormalizer/BismNormalizer.IconSetup/Properties/AssemblyInfo.cs index 864ffc8..811c429 100644 --- a/BismNormalizer/BismNormalizer.IconSetup/Properties/AssemblyInfo.cs +++ b/BismNormalizer/BismNormalizer.IconSetup/Properties/AssemblyInfo.cs @@ -32,5 +32,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("4.0.0.13")] -[assembly: AssemblyFileVersion("4.0.0.13")] +[assembly: AssemblyVersion("4.0.0.14")] +[assembly: AssemblyFileVersion("4.0.0.14")] diff --git a/BismNormalizer/BismNormalizer/BismNormalizer.csproj b/BismNormalizer/BismNormalizer/BismNormalizer.csproj index c1277a6..8c0a239 100644 --- a/BismNormalizer/BismNormalizer/BismNormalizer.csproj +++ b/BismNormalizer/BismNormalizer/BismNormalizer.csproj @@ -170,6 +170,9 @@ ..\packages\Microsoft.VisualStudio.Validation.14.1.111\lib\net45\Microsoft.VisualStudio.Validation.dll True + + ..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll + True diff --git a/BismNormalizer/BismNormalizer/Properties/AssemblyInfo.cs b/BismNormalizer/BismNormalizer/Properties/AssemblyInfo.cs index a951c85..917d3aa 100644 --- a/BismNormalizer/BismNormalizer/Properties/AssemblyInfo.cs +++ b/BismNormalizer/BismNormalizer/Properties/AssemblyInfo.cs @@ -29,5 +29,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("4.0.0.13")] -[assembly: AssemblyFileVersion("4.0.0.13")] +[assembly: AssemblyVersion("4.0.0.14")] +[assembly: AssemblyFileVersion("4.0.0.14")] diff --git a/BismNormalizer/BismNormalizer/TabularCompare/UI/BlobCredentials.cs b/BismNormalizer/BismNormalizer/TabularCompare/UI/BlobCredentials.cs index 6cee6ad..d6d593a 100644 --- a/BismNormalizer/BismNormalizer/TabularCompare/UI/BlobCredentials.cs +++ b/BismNormalizer/BismNormalizer/TabularCompare/UI/BlobCredentials.cs @@ -52,12 +52,22 @@ namespace BismNormalizer.TabularCompare.UI float dpiScaleFactorFudged = _dpiScaleFactor * HighDPIUtils.PrimaryFudgeFactor; this.Scale(new SizeF(dpiScaleFactorFudged * 0.44f, dpiScaleFactorFudged * 0.38f)); - this.Width = Convert.ToInt32(this.Width * dpiScaleFactorFudged * 0.6f); + this.Width = Convert.ToInt32(this.Width * dpiScaleFactorFudged * 0.8f); foreach (Control control in HighDPIUtils.GetChildInControl(this)) { - control.Font = new Font(control.Font.FontFamily, - control.Font.Size * dpiScaleFactorFudged * HighDPIUtils.PrimaryFudgeFactor, - control.Font.Style); + if (control is Button) + { + control.Font = new Font(control.Font.FontFamily, + control.Font.Size * dpiScaleFactorFudged * 1.1f * HighDPIUtils.PrimaryFudgeFactor, + control.Font.Style); + } + else + { + control.Font = new Font(control.Font.FontFamily, + //cbw todo check * 1.4f works on remote desktop setting + control.Font.Size * dpiScaleFactorFudged * 1.4f * HighDPIUtils.PrimaryFudgeFactor, + control.Font.Style); + } } } diff --git a/BismNormalizer/BismNormalizer/TabularCompare/UI/ImpersonationCredentials.cs b/BismNormalizer/BismNormalizer/TabularCompare/UI/ImpersonationCredentials.cs index c246e9f..6a60e58 100644 --- a/BismNormalizer/BismNormalizer/TabularCompare/UI/ImpersonationCredentials.cs +++ b/BismNormalizer/BismNormalizer/TabularCompare/UI/ImpersonationCredentials.cs @@ -56,14 +56,24 @@ namespace BismNormalizer.TabularCompare.UI { //DPI float dpiScaleFactorFudged = _dpiScaleFactor * HighDPIUtils.PrimaryFudgeFactor; - - this.Scale(new SizeF(dpiScaleFactorFudged * 0.44f, dpiScaleFactorFudged * 0.38f)); + this.Scale(new SizeF(dpiScaleFactorFudged * 0.44f, dpiScaleFactorFudged * 0.35f)); this.Width = Convert.ToInt32(this.Width * dpiScaleFactorFudged * 0.6f); + foreach (Control control in HighDPIUtils.GetChildInControl(this)) { - control.Font = new Font(control.Font.FontFamily, - control.Font.Size * dpiScaleFactorFudged * HighDPIUtils.PrimaryFudgeFactor, - control.Font.Style); + if (control is Button) + { + control.Font = new Font(control.Font.FontFamily, + control.Font.Size * dpiScaleFactorFudged * 1.1f * HighDPIUtils.PrimaryFudgeFactor, + control.Font.Style); + } + else + { + control.Font = new Font(control.Font.FontFamily, + //cbw todo check * 1.4f works on remote desktop setting + control.Font.Size * dpiScaleFactorFudged * 1.4f * HighDPIUtils.PrimaryFudgeFactor, + control.Font.Style); + } } } diff --git a/BismNormalizer/BismNormalizer/app.config b/BismNormalizer/BismNormalizer/app.config index f803b17..0de6272 100644 --- a/BismNormalizer/BismNormalizer/app.config +++ b/BismNormalizer/BismNormalizer/app.config @@ -80,7 +80,7 @@ - + diff --git a/BismNormalizer/BismNormalizer/packages.config b/BismNormalizer/BismNormalizer/packages.config index 38b501f..1fa392f 100644 --- a/BismNormalizer/BismNormalizer/packages.config +++ b/BismNormalizer/BismNormalizer/packages.config @@ -17,4 +17,5 @@ + \ No newline at end of file diff --git a/BismNormalizer/BismNormalizer/source.extension.vsixmanifest b/BismNormalizer/BismNormalizer/source.extension.vsixmanifest index d83f827..8eaeb7f 100644 --- a/BismNormalizer/BismNormalizer/source.extension.vsixmanifest +++ b/BismNormalizer/BismNormalizer/source.extension.vsixmanifest @@ -1,26 +1,26 @@  - - - BISM Normalizer - BISM Normalizer manages Analysis Services tabular models - http://bism-normalizer.com/ - Resources\LicenseTerms.txt - Resources\BismNormalizerLogo.png - Resources\BismNormalizerLogoText.png - - - - - - - - - - - - - - - + + + BISM Normalizer + BISM Normalizer manages Analysis Services tabular models + http://bism-normalizer.com/ + Resources\LicenseTerms.txt + Resources\BismNormalizerLogo.png + Resources\BismNormalizerLogoText.png + + + + + + + + + + + + + + + diff --git a/README.md b/README.md index 1c1950b..761fea8 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,9 @@ The AsXEventSample sample shows how to collect streaming xEvents ## [BismNormalizer](https://github.com/Microsoft/Analysis-Services/tree/master/BismNormalizer) BISM Normalizer is a schema diff tool for tabular models +## [UsqlScripts](https://github.com/Microsoft/Analysis-Services/tree/master/UsqlScripts) +Sample U-SQL scripts that demonstrate how to process a TPC-DS data set in Azure Data Lake. + ## Code of Conduct This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). diff --git a/UsqlScripts/Modelling/call_center.usql b/UsqlScripts/Modelling/call_center.usql index a223af7..178ccd1 100644 --- a/UsqlScripts/Modelling/call_center.usql +++ b/UsqlScripts/Modelling/call_center.usql @@ -31,7 +31,7 @@ cc_gmt_offset string, cc_tax_percentage string, empty string -FROM "wasb://call-center@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://call-center@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT cc_call_center_sk, diff --git a/UsqlScripts/Modelling/catalog_page.usql b/UsqlScripts/Modelling/catalog_page.usql index 085e593..a96d118 100644 --- a/UsqlScripts/Modelling/catalog_page.usql +++ b/UsqlScripts/Modelling/catalog_page.usql @@ -9,7 +9,7 @@ cp_description string, cp_type string, empty string -FROM "wasb://catalog-page@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://catalog-page@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT cp_catalog_page_sk, diff --git a/UsqlScripts/Modelling/catalog_returns.usql b/UsqlScripts/Modelling/catalog_returns.usql index f0af0e9..5cbbade 100644 --- a/UsqlScripts/Modelling/catalog_returns.usql +++ b/UsqlScripts/Modelling/catalog_returns.usql @@ -27,7 +27,7 @@ cr_store_credit string, cr_net_loss string, empty string -FROM "wasb://catalog-returns@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT cr_returned_date_sk, diff --git a/UsqlScripts/Modelling/catalog_sales.usql b/UsqlScripts/Modelling/catalog_sales.usql index 2ed2a35..dcf887a 100644 --- a/UsqlScripts/Modelling/catalog_sales.usql +++ b/UsqlScripts/Modelling/catalog_sales.usql @@ -34,7 +34,7 @@ cs_net_paid_inc_ship_tax string, cs_net_profit string, empty string -FROM "wasb://catalog-sales@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT cs_sold_date_sk, diff --git a/UsqlScripts/Modelling/customer.usql b/UsqlScripts/Modelling/customer.usql index 350118c..ef4146d 100644 --- a/UsqlScripts/Modelling/customer.usql +++ b/UsqlScripts/Modelling/customer.usql @@ -18,7 +18,7 @@ c_email_address string, c_last_review_date string, empty string -FROM "wasb://customer@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://customer@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT c_customer_sk, diff --git a/UsqlScripts/Modelling/customer_address.usql b/UsqlScripts/Modelling/customer_address.usql index 528df70..d2515b7 100644 --- a/UsqlScripts/Modelling/customer_address.usql +++ b/UsqlScripts/Modelling/customer_address.usql @@ -13,7 +13,7 @@ ca_gmt_offset string, ca_location_type string, empty string -FROM "wasb://customer-address@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://customer-address@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT ca_address_sk, diff --git a/UsqlScripts/Modelling/customer_demographics.usql b/UsqlScripts/Modelling/customer_demographics.usql index 97b9471..c074fee 100644 --- a/UsqlScripts/Modelling/customer_demographics.usql +++ b/UsqlScripts/Modelling/customer_demographics.usql @@ -9,7 +9,7 @@ cd_dep_employed_count string, cd_dep_college_count string, empty string -FROM "wasb://customer-demographics@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://customer-demographics@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT cd_demo_sk, diff --git a/UsqlScripts/Modelling/date_dim.usql b/UsqlScripts/Modelling/date_dim.usql index 8a67947..c216dcd 100644 --- a/UsqlScripts/Modelling/date_dim.usql +++ b/UsqlScripts/Modelling/date_dim.usql @@ -28,7 +28,7 @@ d_current_quarter string, d_current_year string, empty string -FROM "wasb://date-dim@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://date-dim@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT d_date_sk, diff --git a/UsqlScripts/Modelling/dbgen_version.usql b/UsqlScripts/Modelling/dbgen_version.usql index 83fdad8..61a3b82 100644 --- a/UsqlScripts/Modelling/dbgen_version.usql +++ b/UsqlScripts/Modelling/dbgen_version.usql @@ -4,7 +4,7 @@ dv_create_time string, dv_cmdline_args string, empty string -FROM "wasb://dbgen-version@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://dbgen-version@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT dv_version, diff --git a/UsqlScripts/Modelling/household_demographics.usql b/UsqlScripts/Modelling/household_demographics.usql index 4aaac96..6edfd77 100644 --- a/UsqlScripts/Modelling/household_demographics.usql +++ b/UsqlScripts/Modelling/household_demographics.usql @@ -5,7 +5,7 @@ hd_dep_count string, hd_vehicle_count string, empty string -FROM "wasb://household-demographics@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://household-demographics@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT hd_demo_sk, diff --git a/UsqlScripts/Modelling/income_band.usql b/UsqlScripts/Modelling/income_band.usql index 6be786f..e2dbc0c 100644 --- a/UsqlScripts/Modelling/income_band.usql +++ b/UsqlScripts/Modelling/income_band.usql @@ -3,7 +3,7 @@ b_lower_bound string, b_upper_bound string, empty string -FROM "wasb://income-band@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://income-band@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT b_income_band_sk, diff --git a/UsqlScripts/Modelling/inventory.usql b/UsqlScripts/Modelling/inventory.usql index 7d204c7..730c9c6 100644 --- a/UsqlScripts/Modelling/inventory.usql +++ b/UsqlScripts/Modelling/inventory.usql @@ -4,7 +4,7 @@ nv_warehouse_sk string, nv_quantity_on_hand string, empty string -FROM "wasb://inventory@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://inventory@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT nv_date_sk, diff --git a/UsqlScripts/Modelling/item.usql b/UsqlScripts/Modelling/item.usql index 769073a..6de0064 100644 --- a/UsqlScripts/Modelling/item.usql +++ b/UsqlScripts/Modelling/item.usql @@ -22,7 +22,7 @@ _manager_id string, _product_name string, empty string -FROM "wasb://item@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://item@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT _item_sk, diff --git a/UsqlScripts/Modelling/promotion.usql b/UsqlScripts/Modelling/promotion.usql index 27a4923..50ff856 100644 --- a/UsqlScripts/Modelling/promotion.usql +++ b/UsqlScripts/Modelling/promotion.usql @@ -19,7 +19,7 @@ _purpose string, _discount_active string, empty string -FROM "wasb://promotion@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://promotion@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT _promo_sk, diff --git a/UsqlScripts/Modelling/reason.usql b/UsqlScripts/Modelling/reason.usql index 1a7028a..ee2f717 100644 --- a/UsqlScripts/Modelling/reason.usql +++ b/UsqlScripts/Modelling/reason.usql @@ -3,7 +3,7 @@ _reason_id string, _reason_desc string, empty string -FROM "wasb://reason@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://reason@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT _reason_sk, diff --git a/UsqlScripts/Modelling/ship_mode.usql b/UsqlScripts/Modelling/ship_mode.usql index 9de4e66..e340a71 100644 --- a/UsqlScripts/Modelling/ship_mode.usql +++ b/UsqlScripts/Modelling/ship_mode.usql @@ -6,7 +6,7 @@ sm_carrier string, sm_contract string, empty string -FROM "wasb://ship-mode@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://ship-mode@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT sm_ship_mode_sk, diff --git a/UsqlScripts/Modelling/store.usql b/UsqlScripts/Modelling/store.usql index 05ec370..0367bb4 100644 --- a/UsqlScripts/Modelling/store.usql +++ b/UsqlScripts/Modelling/store.usql @@ -29,7 +29,7 @@ s_gmt_offset string, s_tax_precentage string, empty string -FROM "wasb://store@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://store@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT s_store_sk, diff --git a/UsqlScripts/Modelling/store_returns.usql b/UsqlScripts/Modelling/store_returns.usql index 44b0db9..1b308a0 100644 --- a/UsqlScripts/Modelling/store_returns.usql +++ b/UsqlScripts/Modelling/store_returns.usql @@ -20,7 +20,7 @@ sr_store_credit string, sr_net_loss string, empty string -FROM "wasb://store-returns@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT sr_returned_date_sk, diff --git a/UsqlScripts/Modelling/store_sales.usql b/UsqlScripts/Modelling/store_sales.usql index a14a1a5..d8f6b35 100644 --- a/UsqlScripts/Modelling/store_sales.usql +++ b/UsqlScripts/Modelling/store_sales.usql @@ -23,7 +23,7 @@ ss_net_paid_inc_tax string, ss_net_profit string, empty string -FROM "wasb://store-sales@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT ss_sold_date_sk, diff --git a/UsqlScripts/Modelling/time_dim.usql b/UsqlScripts/Modelling/time_dim.usql index b631ab8..575b75b 100644 --- a/UsqlScripts/Modelling/time_dim.usql +++ b/UsqlScripts/Modelling/time_dim.usql @@ -10,7 +10,7 @@ t_sub_shift string, t_meal_time string, empty string -FROM "wasb://time-dim@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://time-dim@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT t_time_sk, diff --git a/UsqlScripts/Modelling/warehouse.usql b/UsqlScripts/Modelling/warehouse.usql index c0a1145..195e896 100644 --- a/UsqlScripts/Modelling/warehouse.usql +++ b/UsqlScripts/Modelling/warehouse.usql @@ -14,7 +14,7 @@ w_country string, w_gmt_offset string, empty string -FROM "wasb://warehouse@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://warehouse@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT w_warehouse_sk, diff --git a/UsqlScripts/Modelling/web_page.usql b/UsqlScripts/Modelling/web_page.usql index 588c592..c453bd4 100644 --- a/UsqlScripts/Modelling/web_page.usql +++ b/UsqlScripts/Modelling/web_page.usql @@ -14,7 +14,7 @@ wp_image_count string, wp_max_ad_count string, empty string -FROM "wasb://web-page@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://web-page@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT wp_web_page_sk, diff --git a/UsqlScripts/Modelling/web_returns.usql b/UsqlScripts/Modelling/web_returns.usql index 15b74d3..2e30257 100644 --- a/UsqlScripts/Modelling/web_returns.usql +++ b/UsqlScripts/Modelling/web_returns.usql @@ -24,7 +24,7 @@ wr_account_credit string, wr_net_loss string, empty string -FROM "wasb://web-returns@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT wr_returned_date_sk, diff --git a/UsqlScripts/Modelling/web_sales.usql b/UsqlScripts/Modelling/web_sales.usql index a9ab339..c9e9419 100644 --- a/UsqlScripts/Modelling/web_sales.usql +++ b/UsqlScripts/Modelling/web_sales.usql @@ -34,7 +34,7 @@ ws_net_paid_inc_ship_tax string, ws_net_profit string, empty string -FROM "wasb://web-sales@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT ws_sold_date_sk, diff --git a/UsqlScripts/Modelling/web_site.usql b/UsqlScripts/Modelling/web_site.usql index a8c32f4..c941c41 100644 --- a/UsqlScripts/Modelling/web_site.usql +++ b/UsqlScripts/Modelling/web_site.usql @@ -26,7 +26,7 @@ web_gmt_offset string, web_tax_percentage string, empty string -FROM "wasb://web-site@aasuseast2/{*}_{child_id:*}_100.dat" +FROM "wasb://web-site@/{*}_{child_id}_100.dat" USING Extractors.Text(delimiter: '|'); @filtered_results = SELECT web_site_sk, diff --git a/UsqlScripts/all_single/call_center.usql b/UsqlScripts/all_single/call_center.usql new file mode 100644 index 0000000..58bc5cd --- /dev/null +++ b/UsqlScripts/all_single/call_center.usql @@ -0,0 +1,73 @@ +@raw_parsed = EXTRACT child_id int, + cc_call_center_sk string, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk string, + cc_open_date_sk string, + cc_name string, + cc_class string, + cc_employees string, + cc_sq_ft string, + cc_hours string, + cc_manager string, + cc_mkt_id string, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division string, + cc_division_name string, + cc_company string, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset string, + cc_tax_percentage string, + empty string +FROM "wasb://call-center@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cc_call_center_sk, + cc_call_center_id, + cc_rec_start_date, + cc_rec_end_date, + cc_closed_date_sk, + cc_open_date_sk, + cc_name, + cc_class, + cc_employees, + cc_sq_ft, + cc_hours, + cc_manager, + cc_mkt_id, + cc_mkt_class, + cc_mkt_desc, + cc_market_manager, + cc_division, + cc_division_name, + cc_company, + cc_company_name, + cc_street_number, + cc_street_name, + cc_street_type, + cc_suite_number, + cc_city, + cc_county, + cc_state, + cc_zip, + cc_country, + cc_gmt_offset, + cc_tax_percentage +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/call_center.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/catalog_page.usql b/UsqlScripts/all_single/catalog_page.usql new file mode 100644 index 0000000..8acdc32 --- /dev/null +++ b/UsqlScripts/all_single/catalog_page.usql @@ -0,0 +1,29 @@ +@raw_parsed = EXTRACT child_id int, + cp_catalog_page_sk string, + cp_catalog_page_id string, + cp_start_date_sk string, + cp_end_date_sk string, + cp_department string, + cp_catalog_number string, + cp_catalog_page_number string, + cp_description string, + cp_type string, + empty string +FROM "wasb://catalog-page@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cp_catalog_page_sk, + cp_catalog_page_id, + cp_start_date_sk, + cp_end_date_sk, + cp_department, + cp_catalog_number, + cp_catalog_page_number, + cp_description, + cp_type +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/catalog_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/catalog_returns.usql b/UsqlScripts/all_single/catalog_returns.usql new file mode 100644 index 0000000..1ba4a27 --- /dev/null +++ b/UsqlScripts/all_single/catalog_returns.usql @@ -0,0 +1,52 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/catalog_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/catalog_sales.usql b/UsqlScripts/all_single/catalog_sales.usql new file mode 100644 index 0000000..4dea2d7 --- /dev/null +++ b/UsqlScripts/all_single/catalog_sales.usql @@ -0,0 +1,74 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/catalog_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/customer.usql b/UsqlScripts/all_single/customer.usql new file mode 100644 index 0000000..cb8a691 --- /dev/null +++ b/UsqlScripts/all_single/customer.usql @@ -0,0 +1,47 @@ +@raw_parsed = EXTRACT child_id int, + c_customer_sk string, + c_customer_id string, + c_current_cdemo_sk string, + c_current_hdemo_sk string, + c_current_addr_sk string, + c_first_shipto_date_sk string, + c_first_sales_date_sk string, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day string, + c_birth_month string, + c_birth_year string, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string, + empty string +FROM "wasb://customer@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT c_customer_sk, + c_customer_id, + c_current_cdemo_sk, + c_current_hdemo_sk, + c_current_addr_sk, + c_first_shipto_date_sk, + c_first_sales_date_sk, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/customer.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/customer_address.usql b/UsqlScripts/all_single/customer_address.usql new file mode 100644 index 0000000..def5cd0 --- /dev/null +++ b/UsqlScripts/all_single/customer_address.usql @@ -0,0 +1,37 @@ +@raw_parsed = EXTRACT child_id int, + ca_address_sk string, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset string, + ca_location_type string, + empty string +FROM "wasb://customer-address@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ca_address_sk, + ca_address_id, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/customer_address.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/customer_demographics.usql b/UsqlScripts/all_single/customer_demographics.usql new file mode 100644 index 0000000..8204a4f --- /dev/null +++ b/UsqlScripts/all_single/customer_demographics.usql @@ -0,0 +1,29 @@ +@raw_parsed = EXTRACT child_id int, + cd_demo_sk string, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate string, + cd_credit_rating string, + cd_dep_count string, + cd_dep_employed_count string, + cd_dep_college_count string, + empty string +FROM "wasb://customer-demographics@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/customer_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/date_dim.usql b/UsqlScripts/all_single/date_dim.usql new file mode 100644 index 0000000..55ed3ac --- /dev/null +++ b/UsqlScripts/all_single/date_dim.usql @@ -0,0 +1,67 @@ +@raw_parsed = EXTRACT child_id int, + d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year string, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT d_date_sk, + d_date_id, + d_date, + d_month_seq, + d_week_seq, + d_quarter_seq, + d_year, + d_dow, + d_moy, + d_dom, + d_qoy, + d_fy_year, + d_fy_quarter_seq, + d_fy_week_seq, + d_day_name, + d_quarter_name, + d_holiday, + d_weekend, + d_following_holiday, + d_first_dom, + d_last_dom, + d_same_day_ly, + d_same_day_lq, + d_current_day, + d_current_week, + d_current_month, + d_current_quarter, + d_current_year +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/date_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/dbgen_version.usql b/UsqlScripts/all_single/dbgen_version.usql new file mode 100644 index 0000000..c52adb2 --- /dev/null +++ b/UsqlScripts/all_single/dbgen_version.usql @@ -0,0 +1,19 @@ +@raw_parsed = EXTRACT child_id int, + dv_version string, + dv_create_date string, + dv_create_time string, + dv_cmdline_args string, + empty string +FROM "wasb://dbgen-version@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT dv_version, + dv_create_date, + dv_create_time, + dv_cmdline_args +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/dbgen_version.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/household_demographics.usql b/UsqlScripts/all_single/household_demographics.usql new file mode 100644 index 0000000..d3fcd24 --- /dev/null +++ b/UsqlScripts/all_single/household_demographics.usql @@ -0,0 +1,21 @@ +@raw_parsed = EXTRACT child_id int, + hd_demo_sk string, + hd_income_band_sk string, + hd_buy_potential string, + hd_dep_count string, + hd_vehicle_count string, + empty string +FROM "wasb://household-demographics@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT hd_demo_sk, + hd_income_band_sk, + hd_buy_potential, + hd_dep_count, + hd_vehicle_count +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/household_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/income_band.usql b/UsqlScripts/all_single/income_band.usql new file mode 100644 index 0000000..d0d8a21 --- /dev/null +++ b/UsqlScripts/all_single/income_band.usql @@ -0,0 +1,17 @@ +@raw_parsed = EXTRACT child_id int, + b_income_band_sk string, + b_lower_bound string, + b_upper_bound string, + empty string +FROM "wasb://income-band@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT b_income_band_sk, + b_lower_bound, + b_upper_bound +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/income_band.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/inventory.usql b/UsqlScripts/all_single/inventory.usql new file mode 100644 index 0000000..b6d5413 --- /dev/null +++ b/UsqlScripts/all_single/inventory.usql @@ -0,0 +1,19 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/inventory.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/item.usql b/UsqlScripts/all_single/item.usql new file mode 100644 index 0000000..c8ec584 --- /dev/null +++ b/UsqlScripts/all_single/item.usql @@ -0,0 +1,55 @@ +@raw_parsed = EXTRACT child_id int, + _item_sk string, + _item_id string, + _rec_start_date string, + _rec_end_date string, + _item_desc string, + _current_price string, + _wholesale_cost string, + _brand_id string, + _brand string, + _class_id string, + _class string, + _category_id string, + _category string, + _manufact_id string, + _manufact string, + _size string, + _formulation string, + _color string, + _units string, + _container string, + _manager_id string, + _product_name string, + empty string +FROM "wasb://item@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _item_sk, + _item_id, + _rec_start_date, + _rec_end_date, + _item_desc, + _current_price, + _wholesale_cost, + _brand_id, + _brand, + _class_id, + _class, + _category_id, + _category, + _manufact_id, + _manufact, + _size, + _formulation, + _color, + _units, + _container, + _manager_id, + _product_name +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/item.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/promotion.usql b/UsqlScripts/all_single/promotion.usql new file mode 100644 index 0000000..ab0d6fb --- /dev/null +++ b/UsqlScripts/all_single/promotion.usql @@ -0,0 +1,49 @@ +@raw_parsed = EXTRACT child_id int, + _promo_sk string, + _promo_id string, + _start_date_sk string, + _end_date_sk string, + _item_sk string, + _cost string, + _response_target string, + _promo_name string, + _channel_dmail string, + _channel_email string, + _channel_catalog string, + _channel_tv string, + _channel_radio string, + _channel_press string, + _channel_event string, + _channel_demo string, + _channel_details string, + _purpose string, + _discount_active string, + empty string +FROM "wasb://promotion@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _promo_sk, + _promo_id, + _start_date_sk, + _end_date_sk, + _item_sk, + _cost, + _response_target, + _promo_name, + _channel_dmail, + _channel_email, + _channel_catalog, + _channel_tv, + _channel_radio, + _channel_press, + _channel_event, + _channel_demo, + _channel_details, + _purpose, + _discount_active +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/promotion.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/reason.usql b/UsqlScripts/all_single/reason.usql new file mode 100644 index 0000000..f496835 --- /dev/null +++ b/UsqlScripts/all_single/reason.usql @@ -0,0 +1,17 @@ +@raw_parsed = EXTRACT child_id int, + _reason_sk string, + _reason_id string, + _reason_desc string, + empty string +FROM "wasb://reason@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _reason_sk, + _reason_id, + _reason_desc +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/reason.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/ship_mode.usql b/UsqlScripts/all_single/ship_mode.usql new file mode 100644 index 0000000..589201d --- /dev/null +++ b/UsqlScripts/all_single/ship_mode.usql @@ -0,0 +1,23 @@ +@raw_parsed = EXTRACT child_id int, + sm_ship_mode_sk string, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string, + empty string +FROM "wasb://ship-mode@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sm_ship_mode_sk, + sm_ship_mode_id, + sm_type, + sm_code, + sm_carrier, + sm_contract +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/ship_mode.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/store.usql b/UsqlScripts/all_single/store.usql new file mode 100644 index 0000000..1143315 --- /dev/null +++ b/UsqlScripts/all_single/store.usql @@ -0,0 +1,69 @@ +@raw_parsed = EXTRACT child_id int, + s_store_sk string, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk string, + s_store_name string, + s_number_employees string, + s_floor_space string, + s_hours string, + s_manager string, + s_market_id string, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id string, + s_division_name string, + s_company_id string, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset string, + s_tax_precentage string, + empty string +FROM "wasb://store@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT s_store_sk, + s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk, + s_store_name, + s_number_employees, + s_floor_space, + s_hours, + s_manager, + s_market_id, + s_geography_class, + s_market_desc, + s_market_manager, + s_division_id, + s_division_name, + s_company_id, + s_company_name, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + s_country, + s_gmt_offset, + s_tax_precentage +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/store.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/store_returns.usql b/UsqlScripts/all_single/store_returns.usql new file mode 100644 index 0000000..1e52c65 --- /dev/null +++ b/UsqlScripts/all_single/store_returns.usql @@ -0,0 +1,42 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/store_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/store_sales.usql b/UsqlScripts/all_single/store_sales.usql new file mode 100644 index 0000000..7684b2c --- /dev/null +++ b/UsqlScripts/all_single/store_sales.usql @@ -0,0 +1,56 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/store_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/time_dim.usql b/UsqlScripts/all_single/time_dim.usql new file mode 100644 index 0000000..9f1514d --- /dev/null +++ b/UsqlScripts/all_single/time_dim.usql @@ -0,0 +1,31 @@ +@raw_parsed = EXTRACT child_id int, + t_time_sk string, + t_time_id string, + t_time string, + t_hour string, + t_minute string, + t_second string, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string, + empty string +FROM "wasb://time-dim@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT t_time_sk, + t_time_id, + t_time, + t_hour, + t_minute, + t_second, + t_am_pm, + t_shift, + t_sub_shift, + t_meal_time +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/time_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/warehouse.usql b/UsqlScripts/all_single/warehouse.usql new file mode 100644 index 0000000..2cee3b0 --- /dev/null +++ b/UsqlScripts/all_single/warehouse.usql @@ -0,0 +1,39 @@ +@raw_parsed = EXTRACT child_id int, + w_warehouse_sk string, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft string, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset string, + empty string +FROM "wasb://warehouse@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT w_warehouse_sk, + w_warehouse_id, + w_warehouse_name, + w_warehouse_sq_ft, + w_street_number, + w_street_name, + w_street_type, + w_suite_number, + w_city, + w_county, + w_state, + w_zip, + w_country, + w_gmt_offset +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/warehouse.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/web_page.usql b/UsqlScripts/all_single/web_page.usql new file mode 100644 index 0000000..c4c02a4 --- /dev/null +++ b/UsqlScripts/all_single/web_page.usql @@ -0,0 +1,39 @@ +@raw_parsed = EXTRACT child_id int, + wp_web_page_sk string, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk string, + wp_access_date_sk string, + wp_autogen_flag string, + wp_customer_sk string, + wp_url string, + wp_type string, + wp_char_count string, + wp_link_count string, + wp_image_count string, + wp_max_ad_count string, + empty string +FROM "wasb://web-page@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wp_web_page_sk, + wp_web_page_id, + wp_rec_start_date, + wp_rec_end_date, + wp_creation_date_sk, + wp_access_date_sk, + wp_autogen_flag, + wp_customer_sk, + wp_url, + wp_type, + wp_char_count, + wp_link_count, + wp_image_count, + wp_max_ad_count +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/web_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/web_returns.usql b/UsqlScripts/all_single/web_returns.usql new file mode 100644 index 0000000..cb01ef6 --- /dev/null +++ b/UsqlScripts/all_single/web_returns.usql @@ -0,0 +1,50 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/web_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/web_sales.usql b/UsqlScripts/all_single/web_sales.usql new file mode 100644 index 0000000..c26d2e1 --- /dev/null +++ b/UsqlScripts/all_single/web_sales.usql @@ -0,0 +1,70 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/web_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/all_single/web_site.usql b/UsqlScripts/all_single/web_site.usql new file mode 100644 index 0000000..b9ef2c9 --- /dev/null +++ b/UsqlScripts/all_single/web_site.usql @@ -0,0 +1,63 @@ +@raw_parsed = EXTRACT child_id int, + web_site_sk string, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk string, + web_close_date_sk string, + web_class string, + web_manager string, + web_mkt_id string, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id string, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset string, + web_tax_percentage string, + empty string +FROM "wasb://web-site@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT web_site_sk, + web_site_id, + web_rec_start_date, + web_rec_end_date, + web_name, + web_open_date_sk, + web_close_date_sk, + web_class, + web_manager, + web_mkt_id, + web_mkt_class, + web_mkt_desc, + web_market_manager, + web_company_id, + web_company_name, + web_street_number, + web_street_name, + web_street_type, + web_suite_number, + web_city, + web_county, + web_state, + web_zip, + web_country, + web_gmt_offset, + web_tax_percentage +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/all_single/web_site.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/call_center.usql b/UsqlScripts/large_multiple/call_center.usql new file mode 100644 index 0000000..3ea9bc1 --- /dev/null +++ b/UsqlScripts/large_multiple/call_center.usql @@ -0,0 +1,74 @@ +@raw_parsed = EXTRACT child_id int, + cc_call_center_sk string, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk string, + cc_open_date_sk string, + cc_name string, + cc_class string, + cc_employees string, + cc_sq_ft string, + cc_hours string, + cc_manager string, + cc_mkt_id string, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division string, + cc_division_name string, + cc_company string, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset string, + cc_tax_percentage string, + empty string +FROM "wasb://call-center@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cc_call_center_sk, + cc_call_center_id, + cc_rec_start_date, + cc_rec_end_date, + cc_closed_date_sk, + cc_open_date_sk, + cc_name, + cc_class, + cc_employees, + cc_sq_ft, + cc_hours, + cc_manager, + cc_mkt_id, + cc_mkt_class, + cc_mkt_desc, + cc_market_manager, + cc_division, + cc_division_name, + cc_company, + cc_company_name, + cc_street_number, + cc_street_name, + cc_street_type, + cc_suite_number, + cc_city, + cc_county, + cc_state, + cc_zip, + cc_country, + cc_gmt_offset, + cc_tax_percentage +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/call_center.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_page.usql b/UsqlScripts/large_multiple/catalog_page.usql new file mode 100644 index 0000000..d6ef22c --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_page.usql @@ -0,0 +1,30 @@ +@raw_parsed = EXTRACT child_id int, + cp_catalog_page_sk string, + cp_catalog_page_id string, + cp_start_date_sk string, + cp_end_date_sk string, + cp_department string, + cp_catalog_number string, + cp_catalog_page_number string, + cp_description string, + cp_type string, + empty string +FROM "wasb://catalog-page@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cp_catalog_page_sk, + cp_catalog_page_id, + cp_start_date_sk, + cp_end_date_sk, + cp_department, + cp_catalog_number, + cp_catalog_page_number, + cp_description, + cp_type +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_returns_1-25.usql b/UsqlScripts/large_multiple/catalog_returns_1-25.usql new file mode 100644 index 0000000..fe993e4 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_returns_1-25.usql @@ -0,0 +1,53 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_returns_26-50.usql b/UsqlScripts/large_multiple/catalog_returns_26-50.usql new file mode 100644 index 0000000..564b928 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_returns_26-50.usql @@ -0,0 +1,53 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 50; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_returns_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_returns_51-75.usql b/UsqlScripts/large_multiple/catalog_returns_51-75.usql new file mode 100644 index 0000000..6012873 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_returns_51-75.usql @@ -0,0 +1,53 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_returns_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_returns_76-100.usql b/UsqlScripts/large_multiple/catalog_returns_76-100.usql new file mode 100644 index 0000000..83c1c10 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_returns_76-100.usql @@ -0,0 +1,53 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_returns_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_sales_1-25.usql b/UsqlScripts/large_multiple/catalog_sales_1-25.usql new file mode 100644 index 0000000..2d498ba --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_sales_1-25.usql @@ -0,0 +1,75 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_sales_26-50.usql b/UsqlScripts/large_multiple/catalog_sales_26-50.usql new file mode 100644 index 0000000..4607f57 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_sales_26-50.usql @@ -0,0 +1,75 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 50; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_sales_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_sales_51-75.usql b/UsqlScripts/large_multiple/catalog_sales_51-75.usql new file mode 100644 index 0000000..b95de38 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_sales_51-75.usql @@ -0,0 +1,75 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_sales_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/catalog_sales_76-100.usql b/UsqlScripts/large_multiple/catalog_sales_76-100.usql new file mode 100644 index 0000000..197de53 --- /dev/null +++ b/UsqlScripts/large_multiple/catalog_sales_76-100.usql @@ -0,0 +1,75 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/catalog_sales_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/customer.usql b/UsqlScripts/large_multiple/customer.usql new file mode 100644 index 0000000..f083dac --- /dev/null +++ b/UsqlScripts/large_multiple/customer.usql @@ -0,0 +1,48 @@ +@raw_parsed = EXTRACT child_id int, + c_customer_sk string, + c_customer_id string, + c_current_cdemo_sk string, + c_current_hdemo_sk string, + c_current_addr_sk string, + c_first_shipto_date_sk string, + c_first_sales_date_sk string, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day string, + c_birth_month string, + c_birth_year string, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string, + empty string +FROM "wasb://customer@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT c_customer_sk, + c_customer_id, + c_current_cdemo_sk, + c_current_hdemo_sk, + c_current_addr_sk, + c_first_shipto_date_sk, + c_first_sales_date_sk, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/customer.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/customer_address.usql b/UsqlScripts/large_multiple/customer_address.usql new file mode 100644 index 0000000..8a08d92 --- /dev/null +++ b/UsqlScripts/large_multiple/customer_address.usql @@ -0,0 +1,38 @@ +@raw_parsed = EXTRACT child_id int, + ca_address_sk string, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset string, + ca_location_type string, + empty string +FROM "wasb://customer-address@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ca_address_sk, + ca_address_id, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/customer_address.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/customer_demographics.usql b/UsqlScripts/large_multiple/customer_demographics.usql new file mode 100644 index 0000000..244e798 --- /dev/null +++ b/UsqlScripts/large_multiple/customer_demographics.usql @@ -0,0 +1,30 @@ +@raw_parsed = EXTRACT child_id int, + cd_demo_sk string, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate string, + cd_credit_rating string, + cd_dep_count string, + cd_dep_employed_count string, + cd_dep_college_count string, + empty string +FROM "wasb://customer-demographics@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/customer_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/date_dim.usql b/UsqlScripts/large_multiple/date_dim.usql new file mode 100644 index 0000000..62c5904 --- /dev/null +++ b/UsqlScripts/large_multiple/date_dim.usql @@ -0,0 +1,68 @@ +@raw_parsed = EXTRACT child_id int, + d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year string, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT d_date_sk, + d_date_id, + d_date, + d_month_seq, + d_week_seq, + d_quarter_seq, + d_year, + d_dow, + d_moy, + d_dom, + d_qoy, + d_fy_year, + d_fy_quarter_seq, + d_fy_week_seq, + d_day_name, + d_quarter_name, + d_holiday, + d_weekend, + d_following_holiday, + d_first_dom, + d_last_dom, + d_same_day_ly, + d_same_day_lq, + d_current_day, + d_current_week, + d_current_month, + d_current_quarter, + d_current_year +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/date_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/dbgen_version.usql b/UsqlScripts/large_multiple/dbgen_version.usql new file mode 100644 index 0000000..e3d7518 --- /dev/null +++ b/UsqlScripts/large_multiple/dbgen_version.usql @@ -0,0 +1,20 @@ +@raw_parsed = EXTRACT child_id int, + dv_version string, + dv_create_date string, + dv_create_time string, + dv_cmdline_args string, + empty string +FROM "wasb://dbgen-version@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT dv_version, + dv_create_date, + dv_create_time, + dv_cmdline_args +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/dbgen_version.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/household_demographics.usql b/UsqlScripts/large_multiple/household_demographics.usql new file mode 100644 index 0000000..fd6dddf --- /dev/null +++ b/UsqlScripts/large_multiple/household_demographics.usql @@ -0,0 +1,22 @@ +@raw_parsed = EXTRACT child_id int, + hd_demo_sk string, + hd_income_band_sk string, + hd_buy_potential string, + hd_dep_count string, + hd_vehicle_count string, + empty string +FROM "wasb://household-demographics@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT hd_demo_sk, + hd_income_band_sk, + hd_buy_potential, + hd_dep_count, + hd_vehicle_count +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/household_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/income_band.usql b/UsqlScripts/large_multiple/income_band.usql new file mode 100644 index 0000000..e42720e --- /dev/null +++ b/UsqlScripts/large_multiple/income_band.usql @@ -0,0 +1,18 @@ +@raw_parsed = EXTRACT child_id int, + b_income_band_sk string, + b_lower_bound string, + b_upper_bound string, + empty string +FROM "wasb://income-band@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT b_income_band_sk, + b_lower_bound, + b_upper_bound +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/income_band.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/inventory_1-25.usql b/UsqlScripts/large_multiple/inventory_1-25.usql new file mode 100644 index 0000000..484fc7d --- /dev/null +++ b/UsqlScripts/large_multiple/inventory_1-25.usql @@ -0,0 +1,20 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/inventory.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/inventory_26-50.usql b/UsqlScripts/large_multiple/inventory_26-50.usql new file mode 100644 index 0000000..83e0d09 --- /dev/null +++ b/UsqlScripts/large_multiple/inventory_26-50.usql @@ -0,0 +1,20 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 50; + +OUTPUT @filtered_results +TO "/large_multiple/inventory_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/inventory_51-75.usql b/UsqlScripts/large_multiple/inventory_51-75.usql new file mode 100644 index 0000000..ae35891 --- /dev/null +++ b/UsqlScripts/large_multiple/inventory_51-75.usql @@ -0,0 +1,20 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/inventory_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/inventory_76-100.usql b/UsqlScripts/large_multiple/inventory_76-100.usql new file mode 100644 index 0000000..2afb8ad --- /dev/null +++ b/UsqlScripts/large_multiple/inventory_76-100.usql @@ -0,0 +1,20 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/inventory_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/item.usql b/UsqlScripts/large_multiple/item.usql new file mode 100644 index 0000000..2c6f866 --- /dev/null +++ b/UsqlScripts/large_multiple/item.usql @@ -0,0 +1,56 @@ +@raw_parsed = EXTRACT child_id int, + _item_sk string, + _item_id string, + _rec_start_date string, + _rec_end_date string, + _item_desc string, + _current_price string, + _wholesale_cost string, + _brand_id string, + _brand string, + _class_id string, + _class string, + _category_id string, + _category string, + _manufact_id string, + _manufact string, + _size string, + _formulation string, + _color string, + _units string, + _container string, + _manager_id string, + _product_name string, + empty string +FROM "wasb://item@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _item_sk, + _item_id, + _rec_start_date, + _rec_end_date, + _item_desc, + _current_price, + _wholesale_cost, + _brand_id, + _brand, + _class_id, + _class, + _category_id, + _category, + _manufact_id, + _manufact, + _size, + _formulation, + _color, + _units, + _container, + _manager_id, + _product_name +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/item.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/promotion.usql b/UsqlScripts/large_multiple/promotion.usql new file mode 100644 index 0000000..46e8654 --- /dev/null +++ b/UsqlScripts/large_multiple/promotion.usql @@ -0,0 +1,50 @@ +@raw_parsed = EXTRACT child_id int, + _promo_sk string, + _promo_id string, + _start_date_sk string, + _end_date_sk string, + _item_sk string, + _cost string, + _response_target string, + _promo_name string, + _channel_dmail string, + _channel_email string, + _channel_catalog string, + _channel_tv string, + _channel_radio string, + _channel_press string, + _channel_event string, + _channel_demo string, + _channel_details string, + _purpose string, + _discount_active string, + empty string +FROM "wasb://promotion@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _promo_sk, + _promo_id, + _start_date_sk, + _end_date_sk, + _item_sk, + _cost, + _response_target, + _promo_name, + _channel_dmail, + _channel_email, + _channel_catalog, + _channel_tv, + _channel_radio, + _channel_press, + _channel_event, + _channel_demo, + _channel_details, + _purpose, + _discount_active +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/promotion.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/reason.usql b/UsqlScripts/large_multiple/reason.usql new file mode 100644 index 0000000..df74223 --- /dev/null +++ b/UsqlScripts/large_multiple/reason.usql @@ -0,0 +1,18 @@ +@raw_parsed = EXTRACT child_id int, + _reason_sk string, + _reason_id string, + _reason_desc string, + empty string +FROM "wasb://reason@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _reason_sk, + _reason_id, + _reason_desc +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/reason.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/ship_mode.usql b/UsqlScripts/large_multiple/ship_mode.usql new file mode 100644 index 0000000..c3a2c61 --- /dev/null +++ b/UsqlScripts/large_multiple/ship_mode.usql @@ -0,0 +1,24 @@ +@raw_parsed = EXTRACT child_id int, + sm_ship_mode_sk string, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string, + empty string +FROM "wasb://ship-mode@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sm_ship_mode_sk, + sm_ship_mode_id, + sm_type, + sm_code, + sm_carrier, + sm_contract +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/ship_mode.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store.usql b/UsqlScripts/large_multiple/store.usql new file mode 100644 index 0000000..09504fa --- /dev/null +++ b/UsqlScripts/large_multiple/store.usql @@ -0,0 +1,70 @@ +@raw_parsed = EXTRACT child_id int, + s_store_sk string, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk string, + s_store_name string, + s_number_employees string, + s_floor_space string, + s_hours string, + s_manager string, + s_market_id string, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id string, + s_division_name string, + s_company_id string, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset string, + s_tax_precentage string, + empty string +FROM "wasb://store@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT s_store_sk, + s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk, + s_store_name, + s_number_employees, + s_floor_space, + s_hours, + s_manager, + s_market_id, + s_geography_class, + s_market_desc, + s_market_manager, + s_division_id, + s_division_name, + s_company_id, + s_company_name, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + s_country, + s_gmt_offset, + s_tax_precentage +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/store.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_returns_1-25.usql b/UsqlScripts/large_multiple/store_returns_1-25.usql new file mode 100644 index 0000000..7c52154 --- /dev/null +++ b/UsqlScripts/large_multiple/store_returns_1-25.usql @@ -0,0 +1,43 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/store_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_returns_26-50.usql b/UsqlScripts/large_multiple/store_returns_26-50.usql new file mode 100644 index 0000000..5bf9f2d --- /dev/null +++ b/UsqlScripts/large_multiple/store_returns_26-50.usql @@ -0,0 +1,43 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 50; + +OUTPUT @filtered_results +TO "/large_multiple/store_returns_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_returns_51-75.usql b/UsqlScripts/large_multiple/store_returns_51-75.usql new file mode 100644 index 0000000..5e9923a --- /dev/null +++ b/UsqlScripts/large_multiple/store_returns_51-75.usql @@ -0,0 +1,43 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/store_returns_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_returns_76-100.usql b/UsqlScripts/large_multiple/store_returns_76-100.usql new file mode 100644 index 0000000..cd6390f --- /dev/null +++ b/UsqlScripts/large_multiple/store_returns_76-100.usql @@ -0,0 +1,43 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/store_returns_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_sales_1-25.usql b/UsqlScripts/large_multiple/store_sales_1-25.usql new file mode 100644 index 0000000..be62fae --- /dev/null +++ b/UsqlScripts/large_multiple/store_sales_1-25.usql @@ -0,0 +1,57 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/store_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_sales_26-50.usql b/UsqlScripts/large_multiple/store_sales_26-50.usql new file mode 100644 index 0000000..d64cebc --- /dev/null +++ b/UsqlScripts/large_multiple/store_sales_26-50.usql @@ -0,0 +1,57 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 50; + +OUTPUT @filtered_results +TO "/large_multiple/store_sales_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_sales_51-75.usql b/UsqlScripts/large_multiple/store_sales_51-75.usql new file mode 100644 index 0000000..47821d0 --- /dev/null +++ b/UsqlScripts/large_multiple/store_sales_51-75.usql @@ -0,0 +1,57 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/store_sales_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/store_sales_76-100.usql b/UsqlScripts/large_multiple/store_sales_76-100.usql new file mode 100644 index 0000000..84fbd01 --- /dev/null +++ b/UsqlScripts/large_multiple/store_sales_76-100.usql @@ -0,0 +1,57 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/store_sales_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/time_dim.usql b/UsqlScripts/large_multiple/time_dim.usql new file mode 100644 index 0000000..0038237 --- /dev/null +++ b/UsqlScripts/large_multiple/time_dim.usql @@ -0,0 +1,32 @@ +@raw_parsed = EXTRACT child_id int, + t_time_sk string, + t_time_id string, + t_time string, + t_hour string, + t_minute string, + t_second string, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string, + empty string +FROM "wasb://time-dim@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT t_time_sk, + t_time_id, + t_time, + t_hour, + t_minute, + t_second, + t_am_pm, + t_shift, + t_sub_shift, + t_meal_time +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/time_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/warehouse.usql b/UsqlScripts/large_multiple/warehouse.usql new file mode 100644 index 0000000..0e9a3da --- /dev/null +++ b/UsqlScripts/large_multiple/warehouse.usql @@ -0,0 +1,40 @@ +@raw_parsed = EXTRACT child_id int, + w_warehouse_sk string, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft string, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset string, + empty string +FROM "wasb://warehouse@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT w_warehouse_sk, + w_warehouse_id, + w_warehouse_name, + w_warehouse_sq_ft, + w_street_number, + w_street_name, + w_street_type, + w_suite_number, + w_city, + w_county, + w_state, + w_zip, + w_country, + w_gmt_offset +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/warehouse.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_page.usql b/UsqlScripts/large_multiple/web_page.usql new file mode 100644 index 0000000..7c6d176 --- /dev/null +++ b/UsqlScripts/large_multiple/web_page.usql @@ -0,0 +1,40 @@ +@raw_parsed = EXTRACT child_id int, + wp_web_page_sk string, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk string, + wp_access_date_sk string, + wp_autogen_flag string, + wp_customer_sk string, + wp_url string, + wp_type string, + wp_char_count string, + wp_link_count string, + wp_image_count string, + wp_max_ad_count string, + empty string +FROM "wasb://web-page@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wp_web_page_sk, + wp_web_page_id, + wp_rec_start_date, + wp_rec_end_date, + wp_creation_date_sk, + wp_access_date_sk, + wp_autogen_flag, + wp_customer_sk, + wp_url, + wp_type, + wp_char_count, + wp_link_count, + wp_image_count, + wp_max_ad_count +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/web_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_returns_1-25.usql b/UsqlScripts/large_multiple/web_returns_1-25.usql new file mode 100644 index 0000000..b67ae7e --- /dev/null +++ b/UsqlScripts/large_multiple/web_returns_1-25.usql @@ -0,0 +1,51 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/web_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_returns_26-50.usql b/UsqlScripts/large_multiple/web_returns_26-50.usql new file mode 100644 index 0000000..c615ac0 --- /dev/null +++ b/UsqlScripts/large_multiple/web_returns_26-50.usql @@ -0,0 +1,51 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 50; + +OUTPUT @filtered_results +TO "/large_multiple/web_returns_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_returns_51-75.usql b/UsqlScripts/large_multiple/web_returns_51-75.usql new file mode 100644 index 0000000..dfea397 --- /dev/null +++ b/UsqlScripts/large_multiple/web_returns_51-75.usql @@ -0,0 +1,51 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/web_returns_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_returns_76-100.usql b/UsqlScripts/large_multiple/web_returns_76-100.usql new file mode 100644 index 0000000..1ad0907 --- /dev/null +++ b/UsqlScripts/large_multiple/web_returns_76-100.usql @@ -0,0 +1,51 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/web_returns_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_sales_1-25.usql b/UsqlScripts/large_multiple/web_sales_1-25.usql new file mode 100644 index 0000000..1a722e6 --- /dev/null +++ b/UsqlScripts/large_multiple/web_sales_1-25.usql @@ -0,0 +1,71 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 25; + +OUTPUT @filtered_results +TO "/large_multiple/web_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_sales_26-50.usql b/UsqlScripts/large_multiple/web_sales_26-50.usql new file mode 100644 index 0000000..a300474 --- /dev/null +++ b/UsqlScripts/large_multiple/web_sales_26-50.usql @@ -0,0 +1,71 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 26 AND 500; + +OUTPUT @filtered_results +TO "/large_multiple/web_sales_2.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_sales_51-75.usql b/UsqlScripts/large_multiple/web_sales_51-75.usql new file mode 100644 index 0000000..d638836 --- /dev/null +++ b/UsqlScripts/large_multiple/web_sales_51-75.usql @@ -0,0 +1,71 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 51 AND 75; + +OUTPUT @filtered_results +TO "/large_multiple/web_sales_3.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_sales_76-100.usql b/UsqlScripts/large_multiple/web_sales_76-100.usql new file mode 100644 index 0000000..7aa2c29 --- /dev/null +++ b/UsqlScripts/large_multiple/web_sales_76-100.usql @@ -0,0 +1,71 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed +WHERE child_id BETWEEN 76 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/web_sales_4.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/large_multiple/web_site.usql b/UsqlScripts/large_multiple/web_site.usql new file mode 100644 index 0000000..b0bb180 --- /dev/null +++ b/UsqlScripts/large_multiple/web_site.usql @@ -0,0 +1,64 @@ +@raw_parsed = EXTRACT child_id int, + web_site_sk string, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk string, + web_close_date_sk string, + web_class string, + web_manager string, + web_mkt_id string, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id string, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset string, + web_tax_percentage string, + empty string +FROM "wasb://web-site@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT web_site_sk, + web_site_id, + web_rec_start_date, + web_rec_end_date, + web_name, + web_open_date_sk, + web_close_date_sk, + web_class, + web_manager, + web_mkt_id, + web_mkt_class, + web_mkt_desc, + web_market_manager, + web_company_id, + web_company_name, + web_street_number, + web_street_name, + web_street_type, + web_suite_number, + web_city, + web_county, + web_state, + web_zip, + web_country, + web_gmt_offset, + web_tax_percentage +FROM @raw_parsed +WHERE child_id BETWEEN 1 AND 100; + +OUTPUT @filtered_results +TO "/large_multiple/web_site.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/call_center.usql b/UsqlScripts/last_available_year/call_center.usql new file mode 100644 index 0000000..3ca3a6f --- /dev/null +++ b/UsqlScripts/last_available_year/call_center.usql @@ -0,0 +1,73 @@ +@raw_parsed = EXTRACT child_id int, + cc_call_center_sk string, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk string, + cc_open_date_sk string, + cc_name string, + cc_class string, + cc_employees string, + cc_sq_ft string, + cc_hours string, + cc_manager string, + cc_mkt_id string, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division string, + cc_division_name string, + cc_company string, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset string, + cc_tax_percentage string, + empty string +FROM "wasb://call-center@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cc_call_center_sk, + cc_call_center_id, + cc_rec_start_date, + cc_rec_end_date, + cc_closed_date_sk, + cc_open_date_sk, + cc_name, + cc_class, + cc_employees, + cc_sq_ft, + cc_hours, + cc_manager, + cc_mkt_id, + cc_mkt_class, + cc_mkt_desc, + cc_market_manager, + cc_division, + cc_division_name, + cc_company, + cc_company_name, + cc_street_number, + cc_street_name, + cc_street_type, + cc_suite_number, + cc_city, + cc_county, + cc_state, + cc_zip, + cc_country, + cc_gmt_offset, + cc_tax_percentage +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/call_center.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/catalog_page.usql b/UsqlScripts/last_available_year/catalog_page.usql new file mode 100644 index 0000000..7ddcd4e --- /dev/null +++ b/UsqlScripts/last_available_year/catalog_page.usql @@ -0,0 +1,29 @@ +@raw_parsed = EXTRACT child_id int, + cp_catalog_page_sk string, + cp_catalog_page_id string, + cp_start_date_sk string, + cp_end_date_sk string, + cp_department string, + cp_catalog_number string, + cp_catalog_page_number string, + cp_description string, + cp_type string, + empty string +FROM "wasb://catalog-page@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cp_catalog_page_sk, + cp_catalog_page_id, + cp_start_date_sk, + cp_end_date_sk, + cp_department, + cp_catalog_number, + cp_catalog_page_number, + cp_description, + cp_type +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/catalog_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/catalog_returns.usql b/UsqlScripts/last_available_year/catalog_returns.usql new file mode 100644 index 0000000..38a06bd --- /dev/null +++ b/UsqlScripts/last_available_year/catalog_returns.usql @@ -0,0 +1,87 @@ +@raw_parsed = EXTRACT child_id int, + cr_returned_date_sk string, + cr_returned_time_sk string, + cr_item_sk string, + cr_refunded_customer_sk string, + cr_refunded_cdemo_sk string, + cr_refunded_hdemo_sk string, + cr_refunded_addr_sk string, + cr_returning_customer_sk string, + cr_returning_cdemo_sk string, + cr_returning_hdemo_sk string, + cr_returning_addr_sk string, + cr_call_center_sk string, + cr_catalog_page_sk string, + cr_ship_mode_sk string, + cr_warehouse_sk string, + cr_reason_sk string, + cr_order_number string, + cr_return_quantity string, + cr_return_amount string, + cr_return_tax string, + cr_return_amt_inc_tax string, + cr_fee string, + cr_return_ship_cost string, + cr_refunded_cash string, + cr_reversed_charge string, + cr_store_credit string, + cr_net_loss string, + empty string +FROM "wasb://catalog-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@date_dim = EXTRACT d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year int, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/date_dim_1_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cr_returned_date_sk, + cr_item_sk, + cr_returning_customer_sk, + cr_returning_addr_sk, + cr_call_center_sk, + cr_catalog_page_sk, + cr_order_number, + cr_return_quantity, + cr_return_amount, + cr_return_amt_inc_tax, + cr_refunded_cash, + cr_reversed_charge, + cr_store_credit, + cr_net_loss +FROM @raw_parsed +INNER JOIN (SELECT d_date_sk, d_year FROM @date_dim) AS dd + ON cr_returned_date_sk == dd.d_date_sk +WHERE dd.d_year == 2003; + +OUTPUT @filtered_results +TO "/last_available_year/catalog_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/catalog_sales.usql b/UsqlScripts/last_available_year/catalog_sales.usql new file mode 100644 index 0000000..6aa70b1 --- /dev/null +++ b/UsqlScripts/last_available_year/catalog_sales.usql @@ -0,0 +1,109 @@ +@raw_parsed = EXTRACT child_id int, + cs_sold_date_sk string, + cs_sold_time_sk string, + cs_ship_date_sk string, + cs_bill_customer_sk string, + cs_bill_cdemo_sk string, + cs_bill_hdemo_sk string, + cs_bill_addr_sk string, + cs_ship_customer_sk string, + cs_ship_cdemo_sk string, + cs_ship_hdemo_sk string, + cs_ship_addr_sk string, + cs_call_center_sk string, + cs_catalog_page_sk string, + cs_ship_mode_sk string, + cs_warehouse_sk string, + cs_item_sk string, + cs_promo_sk string, + cs_order_number string, + cs_quantity string, + cs_wholesale_cost string, + cs_list_price string, + cs_sales_price string, + cs_ext_discount_amt string, + cs_ext_sales_price string, + cs_ext_wholesale_cost string, + cs_ext_list_price string, + cs_ext_tax string, + cs_coupon_amt string, + cs_ext_ship_cost string, + cs_net_paid string, + cs_net_paid_inc_tax string, + cs_net_paid_inc_ship string, + cs_net_paid_inc_ship_tax string, + cs_net_profit string, + empty string +FROM "wasb://catalog-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@date_dim = EXTRACT d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year int, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/date_dim_1_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cs_sold_date_sk, + cs_sold_time_sk, + cs_ship_date_sk, + cs_bill_customer_sk, + cs_bill_cdemo_sk, + cs_bill_hdemo_sk, + cs_bill_addr_sk, + cs_ship_customer_sk, + cs_ship_hdemo_sk, + cs_ship_addr_sk, + cs_call_center_sk, + cs_catalog_page_sk, + cs_ship_mode_sk, + cs_warehouse_sk, + cs_item_sk, + cs_promo_sk, + cs_order_number, + cs_quantity, + cs_wholesale_cost, + cs_list_price, + cs_sales_price, + cs_ext_discount_amt, + cs_ext_sales_price, + cs_ext_wholesale_cost, + cs_ext_list_price, + cs_coupon_amt, + cs_ext_ship_cost, + cs_net_paid, + cs_net_profit +FROM @raw_parsed +INNER JOIN (SELECT d_date_sk, d_year FROM @date_dim) AS dd + ON cs_sold_date_sk == dd.d_date_sk +WHERE dd.d_year == 2003; + +OUTPUT @filtered_results +TO "/last_available_year/catalog_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/customer.usql b/UsqlScripts/last_available_year/customer.usql new file mode 100644 index 0000000..6fff4d2 --- /dev/null +++ b/UsqlScripts/last_available_year/customer.usql @@ -0,0 +1,47 @@ +@raw_parsed = EXTRACT child_id int, + c_customer_sk string, + c_customer_id string, + c_current_cdemo_sk string, + c_current_hdemo_sk string, + c_current_addr_sk string, + c_first_shipto_date_sk string, + c_first_sales_date_sk string, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day string, + c_birth_month string, + c_birth_year string, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string, + empty string +FROM "wasb://customer@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT c_customer_sk, + c_customer_id, + c_current_cdemo_sk, + c_current_hdemo_sk, + c_current_addr_sk, + c_first_shipto_date_sk, + c_first_sales_date_sk, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/customer.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/customer_address.usql b/UsqlScripts/last_available_year/customer_address.usql new file mode 100644 index 0000000..060a413 --- /dev/null +++ b/UsqlScripts/last_available_year/customer_address.usql @@ -0,0 +1,37 @@ +@raw_parsed = EXTRACT child_id int, + ca_address_sk string, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset string, + ca_location_type string, + empty string +FROM "wasb://customer-address@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ca_address_sk, + ca_address_id, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/customer_address.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/customer_demographics.usql b/UsqlScripts/last_available_year/customer_demographics.usql new file mode 100644 index 0000000..ac18d61 --- /dev/null +++ b/UsqlScripts/last_available_year/customer_demographics.usql @@ -0,0 +1,29 @@ +@raw_parsed = EXTRACT child_id int, + cd_demo_sk string, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate string, + cd_credit_rating string, + cd_dep_count string, + cd_dep_employed_count string, + cd_dep_college_count string, + empty string +FROM "wasb://customer-demographics@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/customer_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/date_dim.usql b/UsqlScripts/last_available_year/date_dim.usql new file mode 100644 index 0000000..5226eb2 --- /dev/null +++ b/UsqlScripts/last_available_year/date_dim.usql @@ -0,0 +1,67 @@ +@raw_parsed = EXTRACT child_id int, + d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year string, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT d_date_sk, + d_date_id, + d_date, + d_month_seq, + d_week_seq, + d_quarter_seq, + d_year, + d_dow, + d_moy, + d_dom, + d_qoy, + d_fy_year, + d_fy_quarter_seq, + d_fy_week_seq, + d_day_name, + d_quarter_name, + d_holiday, + d_weekend, + d_following_holiday, + d_first_dom, + d_last_dom, + d_same_day_ly, + d_same_day_lq, + d_current_day, + d_current_week, + d_current_month, + d_current_quarter, + d_current_year +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/date_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/dbgen_version.usql b/UsqlScripts/last_available_year/dbgen_version.usql new file mode 100644 index 0000000..6c8f44c --- /dev/null +++ b/UsqlScripts/last_available_year/dbgen_version.usql @@ -0,0 +1,19 @@ +@raw_parsed = EXTRACT child_id int, + dv_version string, + dv_create_date string, + dv_create_time string, + dv_cmdline_args string, + empty string +FROM "wasb://dbgen-version@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT dv_version, + dv_create_date, + dv_create_time, + dv_cmdline_args +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/dbgen_version.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/household_demographics.usql b/UsqlScripts/last_available_year/household_demographics.usql new file mode 100644 index 0000000..5e15b87 --- /dev/null +++ b/UsqlScripts/last_available_year/household_demographics.usql @@ -0,0 +1,21 @@ +@raw_parsed = EXTRACT child_id int, + hd_demo_sk string, + hd_income_band_sk string, + hd_buy_potential string, + hd_dep_count string, + hd_vehicle_count string, + empty string +FROM "wasb://household-demographics@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT hd_demo_sk, + hd_income_band_sk, + hd_buy_potential, + hd_dep_count, + hd_vehicle_count +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/household_demographics.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/income_band.usql b/UsqlScripts/last_available_year/income_band.usql new file mode 100644 index 0000000..52edfea --- /dev/null +++ b/UsqlScripts/last_available_year/income_band.usql @@ -0,0 +1,17 @@ +@raw_parsed = EXTRACT child_id int, + b_income_band_sk string, + b_lower_bound string, + b_upper_bound string, + empty string +FROM "wasb://income-band@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT b_income_band_sk, + b_lower_bound, + b_upper_bound +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/income_band.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/inventory.usql b/UsqlScripts/last_available_year/inventory.usql new file mode 100644 index 0000000..e5c50a5 --- /dev/null +++ b/UsqlScripts/last_available_year/inventory.usql @@ -0,0 +1,19 @@ +@raw_parsed = EXTRACT child_id int, + nv_date_sk string, + nv_item_sk string, + nv_warehouse_sk string, + nv_quantity_on_hand string, + empty string +FROM "wasb://inventory@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT nv_date_sk, + nv_item_sk, + nv_warehouse_sk, + nv_quantity_on_hand +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/inventory.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/item.usql b/UsqlScripts/last_available_year/item.usql new file mode 100644 index 0000000..67c2021 --- /dev/null +++ b/UsqlScripts/last_available_year/item.usql @@ -0,0 +1,55 @@ +@raw_parsed = EXTRACT child_id int, + _item_sk string, + _item_id string, + _rec_start_date string, + _rec_end_date string, + _item_desc string, + _current_price string, + _wholesale_cost string, + _brand_id string, + _brand string, + _class_id string, + _class string, + _category_id string, + _category string, + _manufact_id string, + _manufact string, + _size string, + _formulation string, + _color string, + _units string, + _container string, + _manager_id string, + _product_name string, + empty string +FROM "wasb://item@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _item_sk, + _item_id, + _rec_start_date, + _rec_end_date, + _item_desc, + _current_price, + _wholesale_cost, + _brand_id, + _brand, + _class_id, + _class, + _category_id, + _category, + _manufact_id, + _manufact, + _size, + _formulation, + _color, + _units, + _container, + _manager_id, + _product_name +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/item.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/promotion.usql b/UsqlScripts/last_available_year/promotion.usql new file mode 100644 index 0000000..acc46db --- /dev/null +++ b/UsqlScripts/last_available_year/promotion.usql @@ -0,0 +1,49 @@ +@raw_parsed = EXTRACT child_id int, + _promo_sk string, + _promo_id string, + _start_date_sk string, + _end_date_sk string, + _item_sk string, + _cost string, + _response_target string, + _promo_name string, + _channel_dmail string, + _channel_email string, + _channel_catalog string, + _channel_tv string, + _channel_radio string, + _channel_press string, + _channel_event string, + _channel_demo string, + _channel_details string, + _purpose string, + _discount_active string, + empty string +FROM "wasb://promotion@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _promo_sk, + _promo_id, + _start_date_sk, + _end_date_sk, + _item_sk, + _cost, + _response_target, + _promo_name, + _channel_dmail, + _channel_email, + _channel_catalog, + _channel_tv, + _channel_radio, + _channel_press, + _channel_event, + _channel_demo, + _channel_details, + _purpose, + _discount_active +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/promotion.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/reason.usql b/UsqlScripts/last_available_year/reason.usql new file mode 100644 index 0000000..21d5e53 --- /dev/null +++ b/UsqlScripts/last_available_year/reason.usql @@ -0,0 +1,17 @@ +@raw_parsed = EXTRACT child_id int, + _reason_sk string, + _reason_id string, + _reason_desc string, + empty string +FROM "wasb://reason@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT _reason_sk, + _reason_id, + _reason_desc +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/reason.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/ship_mode.usql b/UsqlScripts/last_available_year/ship_mode.usql new file mode 100644 index 0000000..ab48a92 --- /dev/null +++ b/UsqlScripts/last_available_year/ship_mode.usql @@ -0,0 +1,23 @@ +@raw_parsed = EXTRACT child_id int, + sm_ship_mode_sk string, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string, + empty string +FROM "wasb://ship-mode@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sm_ship_mode_sk, + sm_ship_mode_id, + sm_type, + sm_code, + sm_carrier, + sm_contract +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/ship_mode.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/store.usql b/UsqlScripts/last_available_year/store.usql new file mode 100644 index 0000000..9ad64df --- /dev/null +++ b/UsqlScripts/last_available_year/store.usql @@ -0,0 +1,69 @@ +@raw_parsed = EXTRACT child_id int, + s_store_sk string, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk string, + s_store_name string, + s_number_employees string, + s_floor_space string, + s_hours string, + s_manager string, + s_market_id string, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id string, + s_division_name string, + s_company_id string, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset string, + s_tax_precentage string, + empty string +FROM "wasb://store@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT s_store_sk, + s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk, + s_store_name, + s_number_employees, + s_floor_space, + s_hours, + s_manager, + s_market_id, + s_geography_class, + s_market_desc, + s_market_manager, + s_division_id, + s_division_name, + s_company_id, + s_company_name, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + s_country, + s_gmt_offset, + s_tax_precentage +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/store.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/store_returns.usql b/UsqlScripts/last_available_year/store_returns.usql new file mode 100644 index 0000000..11ce6b8 --- /dev/null +++ b/UsqlScripts/last_available_year/store_returns.usql @@ -0,0 +1,77 @@ +@raw_parsed = EXTRACT child_id int, + sr_returned_date_sk string, + sr_return_time_sk string, + sr_item_sk string, + sr_customer_sk string, + sr_cdemo_sk string, + sr_hdemo_sk string, + sr_addr_sk string, + sr_store_sk string, + sr_reason_sk string, + sr_ticket_number string, + sr_return_quantity string, + sr_return_amt string, + sr_return_tax string, + sr_return_amt_inc_tax string, + sr_fee string, + sr_return_ship_cost string, + sr_refunded_cash string, + sr_reversed_charge string, + sr_store_credit string, + sr_net_loss string, + empty string +FROM "wasb://store-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@date_dim = EXTRACT d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year int, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/date_dim_1_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT sr_returned_date_sk, + sr_item_sk, + sr_customer_sk, + sr_cdemo_sk, + sr_store_sk, + sr_reason_sk, + sr_ticket_number, + sr_return_quantity, + sr_return_amt, + sr_return_amt_inc_tax, + sr_net_loss +FROM @raw_parsed +INNER JOIN (SELECT d_date_sk, d_year FROM @date_dim) AS dd + ON sr_returned_date_sk == dd.d_date_sk +WHERE dd.d_year == 2003; + +OUTPUT @filtered_results +TO "/last_available_year/store_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/store_sales.usql b/UsqlScripts/last_available_year/store_sales.usql new file mode 100644 index 0000000..b97e838 --- /dev/null +++ b/UsqlScripts/last_available_year/store_sales.usql @@ -0,0 +1,91 @@ +@raw_parsed = EXTRACT child_id int, + ss_sold_date_sk string, + ss_sold_time_sk string, + ss_item_sk string, + ss_customer_sk string, + ss_cdemo_sk string, + ss_hdemo_sk string, + ss_addr_sk string, + ss_store_sk string, + ss_promo_sk string, + ss_ticket_number string, + ss_quantity string, + ss_wholesale_cost string, + ss_list_price string, + ss_sales_price string, + ss_ext_discount_amt string, + ss_ext_sales_price string, + ss_ext_wholesale_cost string, + ss_ext_list_price string, + ss_ext_tax string, + ss_coupon_amt string, + ss_net_paid string, + ss_net_paid_inc_tax string, + ss_net_profit string, + empty string +FROM "wasb://store-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@date_dim = EXTRACT d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year int, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/date_dim_1_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ss_sold_date_sk, + ss_sold_time_sk, + ss_item_sk, + ss_customer_sk, + ss_cdemo_sk, + ss_hdemo_sk, + ss_addr_sk, + ss_store_sk, + ss_promo_sk, + ss_ticket_number, + ss_quantity, + ss_wholesale_cost, + ss_list_price, + ss_sales_price, + ss_ext_discount_amt, + ss_ext_sales_price, + ss_ext_wholesale_cost, + ss_ext_list_price, + ss_ext_tax, + ss_coupon_amt, + ss_net_paid, + ss_net_profit +FROM @raw_parsed +INNER JOIN (SELECT d_date_sk, d_year FROM @date_dim) AS dd + ON ss_sold_date_sk == dd.d_date_sk +WHERE dd.d_year == 2003; + +OUTPUT @filtered_results +TO "/last_available_year/store_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/time_dim.usql b/UsqlScripts/last_available_year/time_dim.usql new file mode 100644 index 0000000..2dc784d --- /dev/null +++ b/UsqlScripts/last_available_year/time_dim.usql @@ -0,0 +1,31 @@ +@raw_parsed = EXTRACT child_id int, + t_time_sk string, + t_time_id string, + t_time string, + t_hour string, + t_minute string, + t_second string, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string, + empty string +FROM "wasb://time-dim@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT t_time_sk, + t_time_id, + t_time, + t_hour, + t_minute, + t_second, + t_am_pm, + t_shift, + t_sub_shift, + t_meal_time +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/time_dim.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/warehouse.usql b/UsqlScripts/last_available_year/warehouse.usql new file mode 100644 index 0000000..5c527f4 --- /dev/null +++ b/UsqlScripts/last_available_year/warehouse.usql @@ -0,0 +1,39 @@ +@raw_parsed = EXTRACT child_id int, + w_warehouse_sk string, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft string, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset string, + empty string +FROM "wasb://warehouse@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT w_warehouse_sk, + w_warehouse_id, + w_warehouse_name, + w_warehouse_sq_ft, + w_street_number, + w_street_name, + w_street_type, + w_suite_number, + w_city, + w_county, + w_state, + w_zip, + w_country, + w_gmt_offset +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/warehouse.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/web_page.usql b/UsqlScripts/last_available_year/web_page.usql new file mode 100644 index 0000000..0cac6aa --- /dev/null +++ b/UsqlScripts/last_available_year/web_page.usql @@ -0,0 +1,39 @@ +@raw_parsed = EXTRACT child_id int, + wp_web_page_sk string, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk string, + wp_access_date_sk string, + wp_autogen_flag string, + wp_customer_sk string, + wp_url string, + wp_type string, + wp_char_count string, + wp_link_count string, + wp_image_count string, + wp_max_ad_count string, + empty string +FROM "wasb://web-page@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wp_web_page_sk, + wp_web_page_id, + wp_rec_start_date, + wp_rec_end_date, + wp_creation_date_sk, + wp_access_date_sk, + wp_autogen_flag, + wp_customer_sk, + wp_url, + wp_type, + wp_char_count, + wp_link_count, + wp_image_count, + wp_max_ad_count +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/web_page.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/web_returns.usql b/UsqlScripts/last_available_year/web_returns.usql new file mode 100644 index 0000000..75db689 --- /dev/null +++ b/UsqlScripts/last_available_year/web_returns.usql @@ -0,0 +1,85 @@ +@raw_parsed = EXTRACT child_id int, + wr_returned_date_sk string, + wr_returned_time_sk string, + wr_item_sk string, + wr_refunded_customer_sk string, + wr_refunded_cdemo_sk string, + wr_refunded_hdemo_sk string, + wr_refunded_addr_sk string, + wr_returning_customer_sk string, + wr_returning_cdemo_sk string, + wr_returning_hdemo_sk string, + wr_returning_addr_sk string, + wr_web_page_sk string, + wr_reason_sk string, + wr_order_number string, + wr_return_quantity string, + wr_return_amt string, + wr_return_tax string, + wr_return_amt_inc_tax string, + wr_fee string, + wr_return_ship_cost string, + wr_refunded_cash string, + wr_reversed_charge string, + wr_account_credit string, + wr_net_loss string, + empty string +FROM "wasb://web-returns@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@date_dim = EXTRACT d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year int, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/date_dim_1_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT wr_returned_date_sk, + wr_item_sk, + wr_refunded_cdemo_sk, + wr_refunded_addr_sk, + wr_returning_customer_sk, + wr_returning_cdemo_sk, + wr_returning_addr_sk, + wr_web_page_sk, + wr_reason_sk, + wr_order_number, + wr_return_quantity, + wr_return_amt, + wr_fee, + wr_refunded_cash, + wr_net_loss +FROM @raw_parsed +INNER JOIN (SELECT d_date_sk, d_year FROM @date_dim) AS dd + ON wr_returned_date_sk == dd.d_date_sk +WHERE dd.d_year == 2003; + +OUTPUT @filtered_results +TO "/last_available_year/web_returns.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/web_sales.usql b/UsqlScripts/last_available_year/web_sales.usql new file mode 100644 index 0000000..7efd907 --- /dev/null +++ b/UsqlScripts/last_available_year/web_sales.usql @@ -0,0 +1,105 @@ +@raw_parsed = EXTRACT child_id int, + ws_sold_date_sk string, + ws_sold_time_sk string, + ws_ship_date_sk string, + ws_item_sk string, + ws_bill_customer_sk string, + ws_bill_cdemo_sk string, + ws_bill_hdemo_sk string, + ws_bill_addr_sk string, + ws_ship_customer_sk string, + ws_ship_cdemo_sk string, + ws_ship_hdemo_sk string, + ws_ship_addr_sk string, + ws_web_page_sk string, + ws_web_site_sk string, + ws_ship_mode_sk string, + ws_warehouse_sk string, + ws_promo_sk string, + ws_order_number string, + ws_quantity string, + ws_wholesale_cost string, + ws_list_price string, + ws_sales_price string, + ws_ext_discount_amt string, + ws_ext_sales_price string, + ws_ext_wholesale_cost string, + ws_ext_list_price string, + ws_ext_tax string, + ws_coupon_amt string, + ws_ext_ship_cost string, + ws_net_paid string, + ws_net_paid_inc_tax string, + ws_net_paid_inc_ship string, + ws_net_paid_inc_ship_tax string, + ws_net_profit string, + empty string +FROM "wasb://web-sales@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@date_dim = EXTRACT d_date_sk string, + d_date_id string, + d_date string, + d_month_seq string, + d_week_seq string, + d_quarter_seq string, + d_year int, + d_dow string, + d_moy string, + d_dom string, + d_qoy string, + d_fy_year string, + d_fy_quarter_seq string, + d_fy_week_seq string, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom string, + d_last_dom string, + d_same_day_ly string, + d_same_day_lq string, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string, + empty string +FROM "wasb://date-dim@/date_dim_1_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT ws_sold_date_sk, + ws_sold_time_sk, + ws_ship_date_sk, + ws_item_sk, + ws_bill_customer_sk, + ws_bill_addr_sk, + ws_ship_hdemo_sk, + ws_ship_addr_sk, + ws_web_page_sk, + ws_web_site_sk, + ws_ship_mode_sk, + ws_warehouse_sk, + ws_promo_sk, + ws_order_number, + ws_quantity, + ws_wholesale_cost, + ws_list_price, + ws_sales_price, + ws_ext_discount_amt, + ws_ext_sales_price, + ws_ext_wholesale_cost, + ws_ext_list_price, + ws_ext_ship_cost, + ws_net_paid, + ws_net_profit +FROM @raw_parsed +INNER JOIN (SELECT d_date_sk, d_year FROM @date_dim) AS dd + ON ws_sold_date_sk == dd.d_date_sk +WHERE dd.d_year == 2003; + +OUTPUT @filtered_results +TO "/last_available_year/web_sales.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/last_available_year/web_site.usql b/UsqlScripts/last_available_year/web_site.usql new file mode 100644 index 0000000..42a3984 --- /dev/null +++ b/UsqlScripts/last_available_year/web_site.usql @@ -0,0 +1,63 @@ +@raw_parsed = EXTRACT child_id int, + web_site_sk string, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk string, + web_close_date_sk string, + web_class string, + web_manager string, + web_mkt_id string, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id string, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset string, + web_tax_percentage string, + empty string +FROM "wasb://web-site@/{*}_{child_id}_100.dat" +USING Extractors.Text(delimiter: '|'); + +@filtered_results = SELECT web_site_sk, + web_site_id, + web_rec_start_date, + web_rec_end_date, + web_name, + web_open_date_sk, + web_close_date_sk, + web_class, + web_manager, + web_mkt_id, + web_mkt_class, + web_mkt_desc, + web_market_manager, + web_company_id, + web_company_name, + web_street_number, + web_street_name, + web_street_type, + web_suite_number, + web_city, + web_county, + web_state, + web_zip, + web_country, + web_gmt_offset, + web_tax_percentage +FROM @raw_parsed; + +OUTPUT @filtered_results +TO "/last_available_year/web_site.csv" +USING Outputters.Csv(outputHeader: true); + \ No newline at end of file diff --git a/UsqlScripts/readme.rtf b/UsqlScripts/readme.rtf new file mode 100644 index 0000000..91d9aa3 Binary files /dev/null and b/UsqlScripts/readme.rtf differ diff --git a/UsqlScripts/readme.txt b/UsqlScripts/readme.txt new file mode 100644 index 0000000..6d64d93 --- /dev/null +++ b/UsqlScripts/readme.txt @@ -0,0 +1,22 @@ +U-SQL Scripts for Processing a TPC-DS Data Set +The U-SQL scripts for processing a TPC-DS data set demonstrate how to use Azure Data Lake Analytics to prepare raw data for import into an Azure Analysis Services data model. For a detailed discussion, see the blog article “Using Azure Analysis Services on Top of Azure Data Lake Storage” on the Analysis Services Team Blog. +To use these scripts, the TPC-DS data set must be generated by using the dsdgen tool, which can be downloaded as source code from the TPC-DS web site. Run the dsdgen tool with /PARALLEL 100 and /CHILD ids ranging from 1 – 100 to generate the source files with the expected file naming conventions and place the source files in an Azure Blob Storage account, as discussed in “Building an Azure Analysis Services Model on Top of Azure Blob Storage—Part 2” on the Analysis Services Team Blog. Finally, edit the U-SQL scripts and replace the storage account placeholder (@) with your actual storage account. +The subfolders containing the U-SQL scripts highlight different scenarios: +* all_single   These scripts create a single csv file per table containing all the source data. +* large_multiple   These scripts 4 csv files for each of the large tables (catalog_returns, catalog_sales, inventory, store_returns, store_sales, web_returns, and web_sales) and a single csv file for each of the remaining tables. +* last_available_year   These scripts create a single csv file per table containing only the source data for the last year in the data set, which is the year 2003. +* modelling    These scripts create a data set for modelling purposes with a single csv file per table containing up to 100 rows of data. + + + + + + + + + + + + + +