From 9d9c343b71ad02dc82302ba8e0a4ead5cc9615b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B2=88=E6=A5=A0?= Date: Fri, 5 Jul 2024 17:54:16 +0800 Subject: [PATCH] release: 0.1.0 add ods for minutes trade data --- models/dw/dw.dim_time.sql | 6 +- models/dw/dw.fact_stock_daily.sql | 4 + models/dw/dw.fact_stock_minute.sql | 44 +-- models/dw/schema.yml | 9 + models/finance_source.yml | 482 +++++++++++++------------- models/ods/ods.tushare_adj_factor.sql | 19 + models/ods/ods.tushare_minutes.sql | 29 ++ models/ods/schema.yml | 54 +++ 8 files changed, 384 insertions(+), 263 deletions(-) create mode 100644 models/ods/ods.tushare_adj_factor.sql create mode 100644 models/ods/ods.tushare_minutes.sql create mode 100644 models/ods/schema.yml diff --git a/models/dw/dw.dim_time.sql b/models/dw/dw.dim_time.sql index 91c2b1a..198908d 100644 --- a/models/dw/dw.dim_time.sql +++ b/models/dw/dw.dim_time.sql @@ -2,14 +2,16 @@ config( materialized='table', engine='MergeTree', - order_by='time_id' + order_by='(date_id_str, full_time_str, time_id)' ) }} -Select cast(formatDateTime(`full_time`, '%Y%m%d%H%i') As Int64) As time_id +Select toYYYYMMDDhhmmss(`full_time`) As time_id , `full_time` , `date_id` , hour(`full_time`) As `hour` , minute(`full_time`) As `minute` + , formatDateTimeInJodaSyntax(`full_time`, 'yyyy-MM-dd HH:mm:ss') As full_time_str + , Cast(`date_id` As String) As date_id_str From( SELECT arrayJoin( diff --git a/models/dw/dw.fact_stock_daily.sql b/models/dw/dw.fact_stock_daily.sql index e5858a3..3eba141 100644 --- a/models/dw/dw.fact_stock_daily.sql +++ b/models/dw/dw.fact_stock_daily.sql @@ -5,6 +5,10 @@ order_by='ts_code, date_id', unique_key=['ts_code', 'date_id'], incremental_strategy='delete+insert', + query_settings={ + "join_algorithm": "'full_sorting_merge'", + "max_bytes_before_external_sort": "'1000M'" + } ) }} Select diff --git a/models/dw/dw.fact_stock_minute.sql b/models/dw/dw.fact_stock_minute.sql index b8b4df0..62f0ff4 100644 --- a/models/dw/dw.fact_stock_minute.sql +++ b/models/dw/dw.fact_stock_minute.sql @@ -1,38 +1,42 @@ {{ config( - materialized='materialized_view', + materialized='incremental', engine='MergeTree', order_by='ts_code, time_id', unique_key=['ts_code', 'time_id'], + incremental_strategy='delete+insert', partition_by=['cast(floor(time_id / 1000000) As Int32)'], query_settings={ "join_algorithm": "'full_sorting_merge'", - "max_rows_in_set_to_optimize_join": "0", - "max_bytes_before_external_sort": "'1000M'" + "max_bytes_before_external_sort": "'1000M'", + "max_bytes_before_external_group_by":"'1000M'", } ) }} Select - StgMinutes.ts_code As ts_code, - DimTime.time_id As time_id, - StgMinutes.close As close, - StgMinutes.open As open, - StgMinutes.high As high, - StgMinutes.low As low, - StgMinutes.vol As vol, - StgMinutes.amount As amount, - StgAdjFactor.adj_factor As adj_factor, + OdsMinutes.ts_code As ts_code, + OdsMinutes.time_id As time_id, + OdsMinutes.close As close, + OdsMinutes.open As open, + OdsMinutes.high As high, + OdsMinutes.low As low, + OdsMinutes.vol As vol, + OdsMinutes.amount As amount, + OdsAdjFactor.adj_factor As adj_factor, now() As dt -From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes -Inner Join {{ ref('dw.dim_time') }} As DimTime - On StgMinutes.trade_time = formatDateTimeInJodaSyntax(full_time, 'yyyy-MM-dd HH:mm:ss') -Any Left Join {{ source('finance', 'stg.tushare_adj_factor') }} As StgAdjFactor - On StgMinutes.ts_code = StgAdjFactor.ts_code - And StgAdjFactor.trade_date = Cast(DimTime.date_id As String) +From {{ ref('ods.tushare_minutes') }} As OdsMinutes +Any Left Join {{ ref('dw.dim_time') }} As DimTime + On OdsMinutes.time_id = DimTime.time_id +Any Left Join {{ ref('ods.tushare_adj_factor') }} As OdsAdjFactor + On OdsMinutes.ts_code = OdsAdjFactor.ts_code + And OdsAdjFactor.date_id = DimTime.date_id where 1 = 1 +{% if is_incremental() %} + And OdsMinutes.time_id >= (Select max(`time_id`) From {{ this }}) +{% endif %} {% if 'dev' in target.name %} -And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(today(), 'yyyy-01-01 00:00:00') + And OdsMinutes.time_id >= toYYYYMMDDhhmmss(dateAdd(Month,-1,today())) {% elif target.name == 'test' %} -And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Year,-1,today()), 'yyyy-01-01 00:00:00') + And OdsMinutes.time_id >= toYYYYMMDDhhmmss(dateAdd(Year,-1,today())) {% endif %} \ No newline at end of file diff --git a/models/dw/schema.yml b/models/dw/schema.yml index 60df101..535fb0e 100644 --- a/models/dw/schema.yml +++ b/models/dw/schema.yml @@ -94,6 +94,15 @@ models: description: "分钟" tests: - not_null + - name: full_time_str + description: "时间字符串(yyyy-MM-dd HH:mm:ss)" + tests: + - not_null + - unique + - name: date_id_str + description: "日期字符串(yyyy-MM-dd)" + tests: + - not_null - name: dw.dim_hs_calendar description: "沪深交易日历" tests: diff --git a/models/finance_source.yml b/models/finance_source.yml index 1d1b3a8..400eda1 100644 --- a/models/finance_source.yml +++ b/models/finance_source.yml @@ -4,250 +4,250 @@ sources: schema: '{{ env_var("FINANCE_CLICKHOUSE_DATABASE") }}' description: "The source for financial data" tables: - - name: stg.tushare_trade_calendar - description: > - The source table for the trade calendar data from Tushare + - name: stg.tushare_trade_calendar + description: > + The source table for the trade calendar data from Tushare + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange + - cal_date + columns: + - name: exchange + description: "The exchange code" tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - exchange - - cal_date - columns: - - name: exchange - description: "The exchange code" - tests: - - not_null - - name: cal_date - description: "The date" - tests: - - not_null - - name: is_open - description: "Whether the exchange is open" - tests: - - not_null - - name: pretrade_date - description: "The previous trading date" - - name: stg.tushare_stock_basic - columns: - - name: ts_code - description: TS代码 - tests: - - not_null - - name: symbol - description: 股票代码 - - name: name - description: 股票名称 - - name: area - description: 所在地域 - - name: industry - description: 所属行业 - - name: fullname - description: 股票全称 - - name: enname - description: 英文全称 - - name: cnspell - description: 拼音缩写 - - name: market - description: 市场类型 (主板/中小板/创业板) - - name: exchange - description: 交易所代码 - tests: - - not_null - - name: curr_type - description: 交易货币 - - name: list_status - description: 上市状态: L上市 D退市 P暂停上市 - - name: list_date - description: 上市日期 - - name: delist_date - description: 退市日期 - - name: is_hs - description: 是否沪深港通标的,N否 H沪股通 S深股通 - - name: act_name - description: 实控人名称 - - name: act_ent_type - description: 实控人企业性质 - - name: dt - description: 最后修改时间 - unique_composite: - - name: unique_exchange_ts_code - columns: - - exchange - - ts_code - - name: stg.tushare_daily - description: "沪深股票交易日线数据" + - not_null + - name: cal_date + description: "The date" tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - ts_code - - trade_date - columns: - - name: ts_code - description: TS代码 - tests: - - not_null - - name: trade_date - description: 交易日期 - tests: - - not_null - - name: open - description: 开盘价 - tests: - - not_null - - name: high - description: 最高价 - tests: - - not_null - - name: low - description: 最低价 - tests: - - not_null - - name: close - description: 收盘价 - tests: - - not_null - - name: pre_close - description: 昨收价 - tests: - - not_null - - name: change - description: 涨跌额 - tests: - - not_null - - name: pct_chg - description: 涨跌幅 - tests: - - not_null - - name: vol - description: 成交量 - tests: - - not_null - - name: amount - description: 成交额 - - name: stg.tushare_daily_basic - description: "沪深股票每日指标数据" + - not_null + - name: is_open + description: "Whether the exchange is open" tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - ts_code - - trade_date + - not_null + - name: pretrade_date + description: "The previous trading date" + - name: stg.tushare_stock_basic + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: symbol + description: 股票代码 + - name: name + description: 股票名称 + - name: area + description: 所在地域 + - name: industry + description: 所属行业 + - name: fullname + description: 股票全称 + - name: enname + description: 英文全称 + - name: cnspell + description: 拼音缩写 + - name: market + description: 市场类型 (主板/中小板/创业板) + - name: exchange + description: 交易所代码 + tests: + - not_null + - name: curr_type + description: 交易货币 + - name: list_status + description: 上市状态: L上市 D退市 P暂停上市 + - name: list_date + description: 上市日期 + - name: delist_date + description: 退市日期 + - name: is_hs + description: 是否沪深港通标的,N否 H沪股通 S深股通 + - name: act_name + description: 实控人名称 + - name: act_ent_type + description: 实控人企业性质 + - name: dt + description: 最后修改时间 + unique_composite: + - name: unique_exchange_ts_code + columns: + - exchange + - ts_code + - name: stg.tushare_daily + description: "沪深股票交易日线数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: trade_date + description: 交易日期 + tests: + - not_null + - name: open + description: 开盘价 + tests: + - not_null + - name: high + description: 最高价 + tests: + - not_null + - name: low + description: 最低价 + tests: + - not_null + - name: close + description: 收盘价 + tests: + - not_null + - name: pre_close + description: 昨收价 + tests: + - not_null + - name: change + description: 涨跌额 + tests: + - not_null + - name: pct_chg + description: 涨跌幅 + tests: + - not_null + - name: vol + description: 成交量 + tests: + - not_null + - name: amount + description: 成交额 + - name: stg.tushare_daily_basic + description: "沪深股票每日指标数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date - columns: - - name: ts_code - description: "股票代码" - data_type: String - - name: trade_date - description: "交易日期" - data_type: String - - name: close - description: "当日收盘价" - data_type: Nullable(Float32) - - name: turnover_rate - description: "换手率(%)" - data_type: Nullable(Float32) - - name: turnover_rate_f - description: "换手率(自由流通股)" - data_type: Nullable(Float32) - - name: volume_ratio - description: "量比" - data_type: Nullable(Float32) - - name: pe - description: "市盈率(总市值/净利润, 亏损的PE为空)" - data_type: Nullable(Float32) - - name: pe_ttm - description: "市盈率(TTM,亏损的PE为空)" - data_type: Nullable(Float32) - - name: pb - description: "市净率(总市值/净资产)" - data_type: Nullable(Float32) - - name: ps - description: "市销率" - data_type: Nullable(Float32) - - name: ps_ttm - description: "市销率(TTM)" - data_type: Nullable(Float32) - - name: dv_ratio - description: "股息率 (%)" - data_type: Nullable(Float32) - - name: dv_ttm - description: "股息率(TTM)(%)" - data_type: Nullable(Float32) - - name: total_share - description: "总股本 (万股)" - data_type: Nullable(Float32) - - name: float_share - description: "流通股本 (万股)" - data_type: Nullable(Float32) - - name: free_share - description: "自由流通股本 (万)" - data_type: Nullable(Float32) - - name: total_mv - description: "总市值 (万元)" - data_type: Nullable(Float32) - - name: circ_mv - description: "流通市值(万元)" - data_type: Nullable(Float32) - - name: stg.tushare_adj_factor - description: "沪深股票复权因子" + columns: + - name: ts_code + description: "股票代码" + data_type: String + - name: trade_date + description: "交易日期" + data_type: String + - name: close + description: "当日收盘价" + data_type: Nullable(Float32) + - name: turnover_rate + description: "换手率(%)" + data_type: Nullable(Float32) + - name: turnover_rate_f + description: "换手率(自由流通股)" + data_type: Nullable(Float32) + - name: volume_ratio + description: "量比" + data_type: Nullable(Float32) + - name: pe + description: "市盈率(总市值/净利润, 亏损的PE为空)" + data_type: Nullable(Float32) + - name: pe_ttm + description: "市盈率(TTM,亏损的PE为空)" + data_type: Nullable(Float32) + - name: pb + description: "市净率(总市值/净资产)" + data_type: Nullable(Float32) + - name: ps + description: "市销率" + data_type: Nullable(Float32) + - name: ps_ttm + description: "市销率(TTM)" + data_type: Nullable(Float32) + - name: dv_ratio + description: "股息率 (%)" + data_type: Nullable(Float32) + - name: dv_ttm + description: "股息率(TTM)(%)" + data_type: Nullable(Float32) + - name: total_share + description: "总股本 (万股)" + data_type: Nullable(Float32) + - name: float_share + description: "流通股本 (万股)" + data_type: Nullable(Float32) + - name: free_share + description: "自由流通股本 (万)" + data_type: Nullable(Float32) + - name: total_mv + description: "总市值 (万元)" + data_type: Nullable(Float32) + - name: circ_mv + description: "流通市值(万元)" + data_type: Nullable(Float32) + - name: stg.tushare_adj_factor + description: "沪深股票复权因子" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + columns: + - name: ts_code + description: "股票代码" + data_type: String + - name: trade_date + description: "交易日期" + data_type: String + - name: adj_factor + description: "复权因子" + data_type: Nullable(Decimal(16, 6)) + - name: dt + description: "更新时间" + data_type: DateTime + - name: stg.tushare_minutes + description: "沪深分钟级交易数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_time + config: + where: "left(trade_time,7)>=concat(cast(year(today())-1 As String), '-01')" + columns: + - name: ts_code + description: "The stock code." + data_type: String tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - ts_code - - trade_date - columns: - - name: ts_code - description: "股票代码" - data_type: String - - name: trade_date - description: "交易日期" - data_type: String - - name: adj_factor - description: "复权因子" - data_type: Nullable(Decimal(16, 6)) - - name: dt - description: "更新日期" - data_type: DateTime - - name: stg.tushare_minutes - description: "沪深分钟级交易数据" + - not_null + - name: trade_time + description: "The trading time." + data_type: String tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - ts_code - - trade_time - config: - where: "left(trade_time,7)>=concat(cast(year(today())-1 As String), '-01')" - columns: - - name: ts_code - description: "The stock code." - data_type: String - tests: - - not_null - - name: trade_time - description: "The trading time." - data_type: String - tests: - - not_null - - name: close - description: "The closing price." - data_type: Nullable(Float32) - - name: open - description: "The opening price." - data_type: Nullable(Float32) - - name: high - description: "The highest price." - data_type: Nullable(Float32) - - name: low - description: "The lowest price." - data_type: Nullable(Float32) - - name: vol - description: "The volume of trades." - data_type: Nullable(Float32) - - name: amount - description: "The amount of trades." - data_type: Nullable(Float32) - meta: - engine: ReplacingMergeTree - partition_by: "left(trade_time, 7)" - order_by: "(ts_code, trade_time)" - settings: - index_granularity: 8192 + - not_null + - name: close + description: "The closing price." + data_type: Nullable(Float32) + - name: open + description: "The opening price." + data_type: Nullable(Float32) + - name: high + description: "The highest price." + data_type: Nullable(Float32) + - name: low + description: "The lowest price." + data_type: Nullable(Float32) + - name: vol + description: "The volume of trades." + data_type: Nullable(Float32) + - name: amount + description: "The amount of trades." + data_type: Nullable(Float32) + meta: + engine: ReplacingMergeTree + partition_by: "left(trade_time, 7)" + order_by: "(ts_code, trade_time)" + settings: + index_granularity: 8192 diff --git a/models/ods/ods.tushare_adj_factor.sql b/models/ods/ods.tushare_adj_factor.sql new file mode 100644 index 0000000..051add1 --- /dev/null +++ b/models/ods/ods.tushare_adj_factor.sql @@ -0,0 +1,19 @@ +{{ + config( + materialized='incremental', + engine="MergeTree", + order_by="ts_code, date_id", + unique_key=['ts_code', 'date_id'], + incremental_strategy='delete+insert', + ) +}} + +Select + ts_code As ts_code, + toYYYYMMDD(toDate(trade_date)) As date_id, + adj_factor As adj_factor, + dt As dt +From {{ source('finance', 'stg.tushare_adj_factor') }} As StgAdjFactor +{% if is_incremental() %} +Where `trade_date` >= (Select formatDateTimeInJodaSyntax(YYYYMMDDToDate(max(`date_id`)), 'yyyy-MM-dd') From {{ this }}) +{% endif %} \ No newline at end of file diff --git a/models/ods/ods.tushare_minutes.sql b/models/ods/ods.tushare_minutes.sql new file mode 100644 index 0000000..3d721a6 --- /dev/null +++ b/models/ods/ods.tushare_minutes.sql @@ -0,0 +1,29 @@ +{{ + config( + materialized='incremental', + engine="MergeTree", + order_by='ts_code, time_id', + unique_key=['ts_code', 'time_id'], + incremental_strategy='delete+insert', + ) +}} + +Select + StgMinutes.ts_code As ts_code, + toYYYYMMDDhhmmss(toDateTime(`StgMinutes`.`trade_time`)) As time_id, + StgMinutes.close As close, + StgMinutes.open As open, + StgMinutes.high As high, + StgMinutes.low As low, + StgMinutes.vol As vol, + StgMinutes.amount As amount +From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes +where 1 = 1 +{% if is_incremental() %} + And StgMinutes.trade_time >= (Select formatDateTimeInJodaSyntax(YYYYMMDDhhmmssToDateTime(max(time_id)), 'yyyy-MM-dd hh:mm:ss') From {{ this }}) +{% endif %} +{% if 'dev' in target.name %} + And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Month,-1,today()), 'yyyy-01-01 00:00:00') +{% elif target.name == 'test' %} + And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Year,-1,today()), 'yyyy-01-01 00:00:00') +{% endif %} \ No newline at end of file diff --git a/models/ods/schema.yml b/models/ods/schema.yml new file mode 100644 index 0000000..e300d86 --- /dev/null +++ b/models/ods/schema.yml @@ -0,0 +1,54 @@ + +version: 2 + +models: +- name: ods.tushare_minutes + description: "沪深股票交易分钟线数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - time_id + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: time_id + description: 交易日期 + tests: + - not_null + - name: close + description: 收盘价 + - name: open + description: 开盘价 + - name: high + description: 最高价 + - name: low + description: 最低价 + - name: vol + description: 成交量 + - name: amount + description: 成交额 + - name: dt + description: 更新时间 +- name: ods.tushare_adj_factor + description: "沪深股票复权因子" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - date_id + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: date_id + description: 交易日期 + tests: + - not_null + - name: adj_factor + description: 复权因子 + - name: dt + description: 更新时间