Compare commits

..

2 Commits

Author SHA1 Message Date
ec07e9ca89 release: 0.2.0
Some checks failed
MJN/finance-dbt/pipeline/head There was a failure building this commit
adjust fields
2024-07-23 10:33:41 +08:00
f1efaf2c2e feat: use ReplacingMergeTree for ods/dw fact tables
! Have to optimize parts after dbt run incrementially
2024-07-10 16:14:09 +08:00
10 changed files with 98 additions and 89 deletions

View File

@ -10,7 +10,8 @@
) )
}} }}
SELECT SELECT
toUInt32(year(date) * 10000 + month(date) * 100 + day(date)) AS date_id, toYYYYMMDD(date) AS date_id,
formatDateTimeInJodaSyntax(date, 'yyyyMMdd') AS date_id_str,
date AS full_date, date AS full_date,
toYear(date) AS year, toYear(date) AS year,
toUInt8(quarter(date)) AS quarter, toUInt8(quarter(date)) AS quarter,

View File

@ -2,7 +2,7 @@
config( config(
materialized='table', materialized='table',
engine='MergeTree', engine='MergeTree',
order_by='(date_id_str, full_time_str, time_id)' order_by='time_id',
) )
}} }}
Select toYYYYMMDDhhmmss(`full_time`) As time_id Select toYYYYMMDDhhmmss(`full_time`) As time_id

View File

@ -2,12 +2,11 @@
config( config(
materialized='incremental', materialized='incremental',
engine='MergeTree', engine='MergeTree',
order_by='ts_code, date_id', order_by='(date_id, ts_code)',
unique_key=['ts_code', 'date_id'], unique_key=['date_id', 'ts_code'],
incremental_strategy='delete+insert', incremental_strategy='delete+insert',
query_settings={ query_settings={
"join_algorithm": "'full_sorting_merge'", "join_algorithm": "'full_sorting_merge'",
"max_bytes_before_external_sort": "'1000M'"
} }
) )
}} }}
@ -39,7 +38,7 @@ Select
StgDailyBasic.free_share As free_share, StgDailyBasic.free_share As free_share,
StgDailyBasic.total_mv As total_mv, StgDailyBasic.total_mv As total_mv,
StgDailyBasic.circ_mv As circ_mv StgDailyBasic.circ_mv As circ_mv
From {{ source('finance', 'stg.tushare_daily') }} As StgDaily Final From {{ source('finance', 'stg.tushare_daily') }} As StgDaily
Inner Join {{ ref('dw.dim_date') }} As DimDate Inner Join {{ ref('dw.dim_date') }} As DimDate
On StgDaily.trade_date = Cast(DimDate.date_id As String) On StgDaily.trade_date = Cast(DimDate.date_id As String)
Any Left Join {{ source('finance', 'stg.tushare_daily_basic') }} As StgDailyBasic Any Left Join {{ source('finance', 'stg.tushare_daily_basic') }} As StgDailyBasic

View File

@ -2,14 +2,12 @@
config( config(
materialized='incremental', materialized='incremental',
engine='MergeTree', engine='MergeTree',
order_by='ts_code, time_id', order_by='(time_id, ts_code)',
unique_key=['ts_code', 'time_id'], unique_key=['time_id', 'ts_code'],
incremental_strategy='delete+insert', incremental_strategy='append',
partition_by=['cast(floor(time_id / 1000000) As Int32)'], partition_by=['toYYYYMM(toDateTime(time_id))'],
query_settings={ query_settings={
"join_algorithm": "'full_sorting_merge'", "join_algorithm": "'full_sorting_merge'",
"max_bytes_before_external_sort": "'1000M'",
"max_bytes_before_external_group_by":"'1000M'",
} }
) )
}} }}
@ -33,7 +31,7 @@ Any Left Join {{ ref('ods.tushare_adj_factor') }} As OdsAdjFactor
And OdsAdjFactor.date_id = DimTime.date_id And OdsAdjFactor.date_id = DimTime.date_id
where 1 = 1 where 1 = 1
{% if is_incremental() %} {% if is_incremental() %}
And OdsMinutes.time_id >= (Select max(`time_id`) From {{ this }}) And OdsMinutes.time_id >= (Select toYYYYMMDDhhmmss(toStartOfDay(YYYYMMDDhhmmssToDateTime(max(`time_id`)))) From {{ this }})
{% endif %} {% endif %}
{% if 'dev' in target.name %} {% if 'dev' in target.name %}
And OdsMinutes.time_id >= toYYYYMMDDhhmmss(dateAdd(Month,-1,today())) And OdsMinutes.time_id >= toYYYYMMDDhhmmss(dateAdd(Month,-1,today()))

View File

@ -7,22 +7,22 @@ models:
columns: columns:
- name: exchange_id - name: exchange_id
description: "The primary key for this table" description: "The primary key for this table"
tests: data_tests:
- unique - unique
- not_null - not_null
- name: ts_exchange_code - name: ts_exchange_code
description: "交易所代码" description: "交易所代码"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: exchange_name - name: exchange_name
description: "交易所名称" description: "交易所名称"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: dw.dim_date - name: dw.dim_date
description: "日期" description: "日期"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- year - year
@ -31,41 +31,46 @@ models:
columns: columns:
- name: date_id - name: date_id
description: "日期键" description: "日期键"
tests: data_tests:
- unique - unique
- not_null - not_null
- name: date_id_str
description: "日期字符串yyyyMMdd"
data_tests:
- not_null
- unique
- name: full_date - name: full_date
description: "日期" description: "日期"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: year - name: year
description: "年" description: "年"
tests: data_tests:
- not_null - not_null
- name: quarter - name: quarter
description: "季度" description: "季度"
tests: data_tests:
- not_null - not_null
- name: month - name: month
description: "月" description: "月"
tests: data_tests:
- not_null - not_null
- name: day - name: day
description: "日" description: "日"
tests: data_tests:
- not_null - not_null
- name: week_num_of_year - name: week_num_of_year
description: "年中第几周" description: "年中第几周"
tests: data_tests:
- not_null - not_null
- name: day_num_of_week - name: day_num_of_week
description: "星期" description: "星期"
tests: data_tests:
- not_null - not_null
- name: dw.dim_time - name: dw.dim_time
description: "时间" description: "时间"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- date_id - date_id
@ -74,38 +79,38 @@ models:
columns: columns:
- name: time_id - name: time_id
description: "时间键" description: "时间键"
tests: data_tests:
- unique - unique
- not_null - not_null
- name: full_time - name: full_time
description: "时间" description: "时间"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: date_id - name: date_id
description: "日期键" description: "日期键"
tests: data_tests:
- not_null - not_null
- name: hour - name: hour
description: "小时" description: "小时"
tests: data_tests:
- not_null - not_null
- name: minute - name: minute
description: "分钟" description: "分钟"
tests: data_tests:
- not_null - not_null
- name: full_time_str - name: full_time_str
description: "时间字符串yyyy-MM-dd HH:mm:ss" description: "时间字符串yyyy-MM-dd HH:mm:ss"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: date_id_str - name: date_id_str
description: "日期字符串yyyy-MM-dd" description: "日期字符串yyyyMMdd"
tests: data_tests:
- not_null - not_null
- name: dw.dim_hs_calendar - name: dw.dim_hs_calendar
description: "沪深交易日历" description: "沪深交易日历"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- exchange_id - exchange_id
@ -113,19 +118,19 @@ models:
columns: columns:
- name: exchange_id - name: exchange_id
description: "The primary key for this table" description: "The primary key for this table"
tests: data_tests:
- not_null - not_null
- name: date_id - name: date_id
description: "交易日期键" description: "交易日期键"
tests: data_tests:
- not_null - not_null
- name: is_open - name: is_open
description: "是否交易" description: "是否交易"
tests: data_tests:
- not_null - not_null
- name: dw.dim_stock - name: dw.dim_stock
description: "股票" description: "股票"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- exchange_id - exchange_id
@ -133,34 +138,34 @@ models:
columns: columns:
- name: ts_code - name: ts_code
description: "tushare 股票编码" description: "tushare 股票编码"
tests: data_tests:
- unique - unique
- not_null - not_null
- name: exchange_id - name: exchange_id
description: "交易所键" description: "交易所键"
tests: data_tests:
- not_null - not_null
- name: code - name: code
description: "股票代码" description: "股票代码"
tests: data_tests:
- not_null - not_null
- name: name - name: name
description: "股票名称" description: "股票名称"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: full_name - name: full_name
description: "股票全称" description: "股票全称"
tests: data_tests:
- not_null - not_null
- unique - unique
- name: name_en - name: name_en
description: "股票英文名称" description: "股票英文名称"
tests: data_tests:
- not_null - not_null
- name: cn_spell - name: cn_spell
description: "拼音缩写" description: "拼音缩写"
tests: data_tests:
- not_null - not_null
- name: area - name: area
description: "地区" description: "地区"
@ -176,11 +181,11 @@ models:
description: "退市日期" description: "退市日期"
- name: is_active - name: is_active
description: "当前是否在市" description: "当前是否在市"
tests: data_tests:
- not_null - not_null
- name: is_hongkong_connect - name: is_hongkong_connect
description: "是否沪深通标的" description: "是否沪深通标的"
tests: data_tests:
- not_null - not_null
- name: actual_controller - name: actual_controller
description: "实际控制人" description: "实际控制人"
@ -190,7 +195,7 @@ models:
description: "更新时间" description: "更新时间"
- name: dw.fact_stock_daily - name: dw.fact_stock_daily
description: "股票日数据" description: "股票日数据"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -198,11 +203,11 @@ models:
columns: columns:
- name: ts_code - name: ts_code
description: "股票键" description: "股票键"
tests: data_tests:
- not_null - not_null
- name: date_id - name: date_id
description: "日期键" description: "日期键"
tests: data_tests:
- not_null - not_null
- name: open - name: open
description: "开盘价" description: "开盘价"
@ -256,7 +261,7 @@ models:
description: "流通市值(万元)" description: "流通市值(万元)"
- name: dw.fact_stock_minute - name: dw.fact_stock_minute
description: "股票分钟数据" description: "股票分钟数据"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -265,12 +270,12 @@ models:
- name: ts_code - name: ts_code
description: "股票键" description: "股票键"
data_type: "String" data_type: "String"
tests: data_tests:
- not_null - not_null
- name: time_id - name: time_id
description: "时间键" description: "时间键"
data_type: "Int64" data_type: "Int64"
tests: data_tests:
- not_null - not_null
- name: open - name: open
description: "开盘价" description: "开盘价"

View File

@ -7,7 +7,7 @@ sources:
- name: stg.tushare_trade_calendar - name: stg.tushare_trade_calendar
description: > description: >
The source table for the trade calendar data from Tushare The source table for the trade calendar data from Tushare
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- exchange - exchange
@ -15,15 +15,15 @@ sources:
columns: columns:
- name: exchange - name: exchange
description: "The exchange code" description: "The exchange code"
tests: data_tests:
- not_null - not_null
- name: cal_date - name: cal_date
description: "The date" description: "The date"
tests: data_tests:
- not_null - not_null
- name: is_open - name: is_open
description: "Whether the exchange is open" description: "Whether the exchange is open"
tests: data_tests:
- not_null - not_null
- name: pretrade_date - name: pretrade_date
description: "The previous trading date" description: "The previous trading date"
@ -31,7 +31,7 @@ sources:
columns: columns:
- name: ts_code - name: ts_code
description: TS代码 description: TS代码
tests: data_tests:
- not_null - not_null
- name: symbol - name: symbol
description: 股票代码 description: 股票代码
@ -51,7 +51,7 @@ sources:
description: 市场类型 (主板/中小板/创业板) description: 市场类型 (主板/中小板/创业板)
- name: exchange - name: exchange
description: 交易所代码 description: 交易所代码
tests: data_tests:
- not_null - not_null
- name: curr_type - name: curr_type
description: 交易货币 description: 交易货币
@ -76,7 +76,7 @@ sources:
- ts_code - ts_code
- name: stg.tushare_daily - name: stg.tushare_daily
description: "沪深股票交易日线数据" description: "沪深股票交易日线数据"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -84,49 +84,49 @@ sources:
columns: columns:
- name: ts_code - name: ts_code
description: TS代码 description: TS代码
tests: data_tests:
- not_null - not_null
- name: trade_date - name: trade_date
description: 交易日期 description: 交易日期
tests: data_tests:
- not_null - not_null
- name: open - name: open
description: 开盘价 description: 开盘价
tests: data_tests:
- not_null - not_null
- name: high - name: high
description: 最高价 description: 最高价
tests: data_tests:
- not_null - not_null
- name: low - name: low
description: 最低价 description: 最低价
tests: data_tests:
- not_null - not_null
- name: close - name: close
description: 收盘价 description: 收盘价
tests: data_tests:
- not_null - not_null
- name: pre_close - name: pre_close
description: 昨收价 description: 昨收价
tests: data_tests:
- not_null - not_null
- name: change - name: change
description: 涨跌额 description: 涨跌额
tests: data_tests:
- not_null - not_null
- name: pct_chg - name: pct_chg
description: 涨跌幅 description: 涨跌幅
tests: data_tests:
- not_null - not_null
- name: vol - name: vol
description: 成交量 description: 成交量
tests: data_tests:
- not_null - not_null
- name: amount - name: amount
description: 成交额 description: 成交额
- name: stg.tushare_daily_basic - name: stg.tushare_daily_basic
description: "沪深股票每日指标数据" description: "沪深股票每日指标数据"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -189,7 +189,7 @@ sources:
data_type: Nullable(Float32) data_type: Nullable(Float32)
- name: stg.tushare_adj_factor - name: stg.tushare_adj_factor
description: "沪深股票复权因子" description: "沪深股票复权因子"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -209,7 +209,7 @@ sources:
data_type: DateTime data_type: DateTime
- name: stg.tushare_minutes - name: stg.tushare_minutes
description: "沪深分钟级交易数据" description: "沪深分钟级交易数据"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -220,12 +220,12 @@ sources:
- name: ts_code - name: ts_code
description: "The stock code." description: "The stock code."
data_type: String data_type: String
tests: data_tests:
- not_null - not_null
- name: trade_time - name: trade_time
description: "The trading time." description: "The trading time."
data_type: String data_type: String
tests: data_tests:
- not_null - not_null
- name: close - name: close
description: "The closing price." description: "The closing price."

View File

@ -2,8 +2,8 @@
config( config(
materialized='incremental', materialized='incremental',
engine="MergeTree", engine="MergeTree",
order_by="ts_code, date_id", order_by="(date_id, ts_code)",
unique_key=['ts_code', 'date_id'], unique_key=['date_id', 'ts_code'],
incremental_strategy='delete+insert', incremental_strategy='delete+insert',
) )
}} }}

View File

@ -2,9 +2,15 @@
config( config(
materialized='incremental', materialized='incremental',
engine="MergeTree", engine="MergeTree",
order_by='ts_code, time_id', order_by='(time_id, ts_code)',
unique_key=['ts_code', 'time_id'], unique_key=['time_id', 'ts_code'],
partition_by=['toYYYYMM(toDateTime(time_id))'],
incremental_strategy='delete+insert', incremental_strategy='delete+insert',
query_settings={
"join_algorithm": "'full_sorting_merge'",
"max_bytes_before_external_sort": "'1000M'",
"max_bytes_before_external_group_by":"'1000M'",
}
) )
}} }}
@ -20,7 +26,7 @@ Select
From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes
where 1 = 1 where 1 = 1
{% if is_incremental() %} {% if is_incremental() %}
And StgMinutes.trade_time >= (Select formatDateTimeInJodaSyntax(YYYYMMDDhhmmssToDateTime(max(time_id)), 'yyyy-MM-dd hh:mm:ss') From {{ this }}) And StgMinutes.trade_time >= (Select formatDateTimeInJodaSyntax(toStartOfDay(YYYYMMDDhhmmssToDateTime(max(time_id))), 'yyyy-MM-dd HH:mm:ss') From {{ this }})
{% endif %} {% endif %}
{% if 'dev' in target.name %} {% if 'dev' in target.name %}
And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Month,-1,today()), 'yyyy-01-01 00:00:00') And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Month,-1,today()), 'yyyy-01-01 00:00:00')

View File

@ -4,7 +4,7 @@ version: 2
models: models:
- name: ods.tushare_minutes - name: ods.tushare_minutes
description: "沪深股票交易分钟线数据" description: "沪深股票交易分钟线数据"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -12,11 +12,11 @@ models:
columns: columns:
- name: ts_code - name: ts_code
description: TS代码 description: TS代码
tests: data_tests:
- not_null - not_null
- name: time_id - name: time_id
description: 交易日期 description: 交易日期
tests: data_tests:
- not_null - not_null
- name: close - name: close
description: 收盘价 description: 收盘价
@ -34,7 +34,7 @@ models:
description: 更新时间 description: 更新时间
- name: ods.tushare_adj_factor - name: ods.tushare_adj_factor
description: "沪深股票复权因子" description: "沪深股票复权因子"
tests: data_tests:
- dbt_utils.unique_combination_of_columns: - dbt_utils.unique_combination_of_columns:
combination_of_columns: combination_of_columns:
- ts_code - ts_code
@ -42,11 +42,11 @@ models:
columns: columns:
- name: ts_code - name: ts_code
description: TS代码 description: TS代码
tests: data_tests:
- not_null - not_null
- name: date_id - name: date_id
description: 交易日期 description: 交易日期
tests: data_tests:
- not_null - not_null
- name: adj_factor - name: adj_factor
description: 复权因子 description: 复权因子

View File

@ -15,7 +15,7 @@ finance_dbt:
retries: 1 retries: 1
compression: gzip compression: gzip
connect_timeout: 10 connect_timeout: 10
send_receive_timeout: 300 send_receive_timeout: 24000
cluster_mode: False cluster_mode: False
use_lw_deletes: True use_lw_deletes: True
check_exchange: True check_exchange: True