Compare commits

...

2 Commits

Author SHA1 Message Date
ec07e9ca89 release: 0.2.0
Some checks failed
MJN/finance-dbt/pipeline/head There was a failure building this commit
adjust fields
2024-07-23 10:33:41 +08:00
f1efaf2c2e feat: use ReplacingMergeTree for ods/dw fact tables
! Have to optimize parts after dbt run incrementially
2024-07-10 16:14:09 +08:00
10 changed files with 98 additions and 89 deletions

View File

@ -10,7 +10,8 @@
)
}}
SELECT
toUInt32(year(date) * 10000 + month(date) * 100 + day(date)) AS date_id,
toYYYYMMDD(date) AS date_id,
formatDateTimeInJodaSyntax(date, 'yyyyMMdd') AS date_id_str,
date AS full_date,
toYear(date) AS year,
toUInt8(quarter(date)) AS quarter,

View File

@ -2,7 +2,7 @@
config(
materialized='table',
engine='MergeTree',
order_by='(date_id_str, full_time_str, time_id)'
order_by='time_id',
)
}}
Select toYYYYMMDDhhmmss(`full_time`) As time_id

View File

@ -2,12 +2,11 @@
config(
materialized='incremental',
engine='MergeTree',
order_by='ts_code, date_id',
unique_key=['ts_code', 'date_id'],
order_by='(date_id, ts_code)',
unique_key=['date_id', 'ts_code'],
incremental_strategy='delete+insert',
query_settings={
"join_algorithm": "'full_sorting_merge'",
"max_bytes_before_external_sort": "'1000M'"
"join_algorithm": "'full_sorting_merge'",
}
)
}}
@ -39,7 +38,7 @@ Select
StgDailyBasic.free_share As free_share,
StgDailyBasic.total_mv As total_mv,
StgDailyBasic.circ_mv As circ_mv
From {{ source('finance', 'stg.tushare_daily') }} As StgDaily Final
From {{ source('finance', 'stg.tushare_daily') }} As StgDaily
Inner Join {{ ref('dw.dim_date') }} As DimDate
On StgDaily.trade_date = Cast(DimDate.date_id As String)
Any Left Join {{ source('finance', 'stg.tushare_daily_basic') }} As StgDailyBasic

View File

@ -2,14 +2,12 @@
config(
materialized='incremental',
engine='MergeTree',
order_by='ts_code, time_id',
unique_key=['ts_code', 'time_id'],
incremental_strategy='delete+insert',
partition_by=['cast(floor(time_id / 1000000) As Int32)'],
order_by='(time_id, ts_code)',
unique_key=['time_id', 'ts_code'],
incremental_strategy='append',
partition_by=['toYYYYMM(toDateTime(time_id))'],
query_settings={
"join_algorithm": "'full_sorting_merge'",
"max_bytes_before_external_sort": "'1000M'",
"max_bytes_before_external_group_by":"'1000M'",
}
)
}}
@ -33,7 +31,7 @@ Any Left Join {{ ref('ods.tushare_adj_factor') }} As OdsAdjFactor
And OdsAdjFactor.date_id = DimTime.date_id
where 1 = 1
{% if is_incremental() %}
And OdsMinutes.time_id >= (Select max(`time_id`) From {{ this }})
And OdsMinutes.time_id >= (Select toYYYYMMDDhhmmss(toStartOfDay(YYYYMMDDhhmmssToDateTime(max(`time_id`)))) From {{ this }})
{% endif %}
{% if 'dev' in target.name %}
And OdsMinutes.time_id >= toYYYYMMDDhhmmss(dateAdd(Month,-1,today()))

View File

@ -7,22 +7,22 @@ models:
columns:
- name: exchange_id
description: "The primary key for this table"
tests:
data_tests:
- unique
- not_null
- name: ts_exchange_code
description: "交易所代码"
tests:
data_tests:
- not_null
- unique
- name: exchange_name
description: "交易所名称"
tests:
data_tests:
- not_null
- unique
- name: dw.dim_date
description: "日期"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- year
@ -31,41 +31,46 @@ models:
columns:
- name: date_id
description: "日期键"
tests:
data_tests:
- unique
- not_null
- name: date_id_str
description: "日期字符串yyyyMMdd"
data_tests:
- not_null
- unique
- name: full_date
description: "日期"
tests:
data_tests:
- not_null
- unique
- name: year
description: "年"
tests:
data_tests:
- not_null
- name: quarter
description: "季度"
tests:
data_tests:
- not_null
- name: month
description: "月"
tests:
data_tests:
- not_null
- name: day
description: "日"
tests:
data_tests:
- not_null
- name: week_num_of_year
description: "年中第几周"
tests:
data_tests:
- not_null
- name: day_num_of_week
description: "星期"
tests:
data_tests:
- not_null
- name: dw.dim_time
description: "时间"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date_id
@ -74,38 +79,38 @@ models:
columns:
- name: time_id
description: "时间键"
tests:
data_tests:
- unique
- not_null
- name: full_time
description: "时间"
tests:
data_tests:
- not_null
- unique
- name: date_id
description: "日期键"
tests:
data_tests:
- not_null
- name: hour
description: "小时"
tests:
data_tests:
- not_null
- name: minute
description: "分钟"
tests:
data_tests:
- not_null
- name: full_time_str
description: "时间字符串yyyy-MM-dd HH:mm:ss"
tests:
data_tests:
- not_null
- unique
- name: date_id_str
description: "日期字符串yyyy-MM-dd"
tests:
description: "日期字符串yyyyMMdd"
data_tests:
- not_null
- name: dw.dim_hs_calendar
description: "沪深交易日历"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- exchange_id
@ -113,19 +118,19 @@ models:
columns:
- name: exchange_id
description: "The primary key for this table"
tests:
data_tests:
- not_null
- name: date_id
description: "交易日期键"
tests:
data_tests:
- not_null
- name: is_open
description: "是否交易"
tests:
data_tests:
- not_null
- name: dw.dim_stock
description: "股票"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- exchange_id
@ -133,34 +138,34 @@ models:
columns:
- name: ts_code
description: "tushare 股票编码"
tests:
data_tests:
- unique
- not_null
- name: exchange_id
description: "交易所键"
tests:
data_tests:
- not_null
- name: code
description: "股票代码"
tests:
data_tests:
- not_null
- name: name
description: "股票名称"
tests:
data_tests:
- not_null
- unique
- name: full_name
description: "股票全称"
tests:
data_tests:
- not_null
- unique
- name: name_en
description: "股票英文名称"
tests:
data_tests:
- not_null
- name: cn_spell
description: "拼音缩写"
tests:
data_tests:
- not_null
- name: area
description: "地区"
@ -176,11 +181,11 @@ models:
description: "退市日期"
- name: is_active
description: "当前是否在市"
tests:
data_tests:
- not_null
- name: is_hongkong_connect
description: "是否沪深通标的"
tests:
data_tests:
- not_null
- name: actual_controller
description: "实际控制人"
@ -190,7 +195,7 @@ models:
description: "更新时间"
- name: dw.fact_stock_daily
description: "股票日数据"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -198,11 +203,11 @@ models:
columns:
- name: ts_code
description: "股票键"
tests:
data_tests:
- not_null
- name: date_id
description: "日期键"
tests:
data_tests:
- not_null
- name: open
description: "开盘价"
@ -256,7 +261,7 @@ models:
description: "流通市值(万元)"
- name: dw.fact_stock_minute
description: "股票分钟数据"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -265,12 +270,12 @@ models:
- name: ts_code
description: "股票键"
data_type: "String"
tests:
data_tests:
- not_null
- name: time_id
description: "时间键"
data_type: "Int64"
tests:
data_tests:
- not_null
- name: open
description: "开盘价"

View File

@ -7,7 +7,7 @@ sources:
- name: stg.tushare_trade_calendar
description: >
The source table for the trade calendar data from Tushare
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- exchange
@ -15,15 +15,15 @@ sources:
columns:
- name: exchange
description: "The exchange code"
tests:
data_tests:
- not_null
- name: cal_date
description: "The date"
tests:
data_tests:
- not_null
- name: is_open
description: "Whether the exchange is open"
tests:
data_tests:
- not_null
- name: pretrade_date
description: "The previous trading date"
@ -31,7 +31,7 @@ sources:
columns:
- name: ts_code
description: TS代码
tests:
data_tests:
- not_null
- name: symbol
description: 股票代码
@ -51,7 +51,7 @@ sources:
description: 市场类型 (主板/中小板/创业板)
- name: exchange
description: 交易所代码
tests:
data_tests:
- not_null
- name: curr_type
description: 交易货币
@ -76,7 +76,7 @@ sources:
- ts_code
- name: stg.tushare_daily
description: "沪深股票交易日线数据"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -84,49 +84,49 @@ sources:
columns:
- name: ts_code
description: TS代码
tests:
data_tests:
- not_null
- name: trade_date
description: 交易日期
tests:
data_tests:
- not_null
- name: open
description: 开盘价
tests:
data_tests:
- not_null
- name: high
description: 最高价
tests:
data_tests:
- not_null
- name: low
description: 最低价
tests:
data_tests:
- not_null
- name: close
description: 收盘价
tests:
data_tests:
- not_null
- name: pre_close
description: 昨收价
tests:
data_tests:
- not_null
- name: change
description: 涨跌额
tests:
data_tests:
- not_null
- name: pct_chg
description: 涨跌幅
tests:
data_tests:
- not_null
- name: vol
description: 成交量
tests:
data_tests:
- not_null
- name: amount
description: 成交额
- name: stg.tushare_daily_basic
description: "沪深股票每日指标数据"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -189,7 +189,7 @@ sources:
data_type: Nullable(Float32)
- name: stg.tushare_adj_factor
description: "沪深股票复权因子"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -209,7 +209,7 @@ sources:
data_type: DateTime
- name: stg.tushare_minutes
description: "沪深分钟级交易数据"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -220,12 +220,12 @@ sources:
- name: ts_code
description: "The stock code."
data_type: String
tests:
data_tests:
- not_null
- name: trade_time
description: "The trading time."
data_type: String
tests:
data_tests:
- not_null
- name: close
description: "The closing price."

View File

@ -2,8 +2,8 @@
config(
materialized='incremental',
engine="MergeTree",
order_by="ts_code, date_id",
unique_key=['ts_code', 'date_id'],
order_by="(date_id, ts_code)",
unique_key=['date_id', 'ts_code'],
incremental_strategy='delete+insert',
)
}}

View File

@ -2,9 +2,15 @@
config(
materialized='incremental',
engine="MergeTree",
order_by='ts_code, time_id',
unique_key=['ts_code', 'time_id'],
order_by='(time_id, ts_code)',
unique_key=['time_id', 'ts_code'],
partition_by=['toYYYYMM(toDateTime(time_id))'],
incremental_strategy='delete+insert',
query_settings={
"join_algorithm": "'full_sorting_merge'",
"max_bytes_before_external_sort": "'1000M'",
"max_bytes_before_external_group_by":"'1000M'",
}
)
}}
@ -20,7 +26,7 @@ Select
From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes
where 1 = 1
{% if is_incremental() %}
And StgMinutes.trade_time >= (Select formatDateTimeInJodaSyntax(YYYYMMDDhhmmssToDateTime(max(time_id)), 'yyyy-MM-dd hh:mm:ss') From {{ this }})
And StgMinutes.trade_time >= (Select formatDateTimeInJodaSyntax(toStartOfDay(YYYYMMDDhhmmssToDateTime(max(time_id))), 'yyyy-MM-dd HH:mm:ss') From {{ this }})
{% endif %}
{% if 'dev' in target.name %}
And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Month,-1,today()), 'yyyy-01-01 00:00:00')

View File

@ -4,7 +4,7 @@ version: 2
models:
- name: ods.tushare_minutes
description: "沪深股票交易分钟线数据"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -12,11 +12,11 @@ models:
columns:
- name: ts_code
description: TS代码
tests:
data_tests:
- not_null
- name: time_id
description: 交易日期
tests:
data_tests:
- not_null
- name: close
description: 收盘价
@ -34,7 +34,7 @@ models:
description: 更新时间
- name: ods.tushare_adj_factor
description: "沪深股票复权因子"
tests:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ts_code
@ -42,11 +42,11 @@ models:
columns:
- name: ts_code
description: TS代码
tests:
data_tests:
- not_null
- name: date_id
description: 交易日期
tests:
data_tests:
- not_null
- name: adj_factor
description: 复权因子

View File

@ -15,7 +15,7 @@ finance_dbt:
retries: 1
compression: gzip
connect_timeout: 10
send_receive_timeout: 300
send_receive_timeout: 24000
cluster_mode: False
use_lw_deletes: True
check_exchange: True