commit 36638a7ddf4b1200becdccf878b3ecc3388298d2 Author: 沈楠 Date: Mon Apr 8 11:43:51 2024 +0800 init: create finance-dbt diff --git a/.env b/.env new file mode 100644 index 0000000..3c867ac --- /dev/null +++ b/.env @@ -0,0 +1,6 @@ +FINANCE_DBT_TARGET=dev +FINANCE_CLICKHOUSE_HOST=localhost +FINANCE_CLICKHOUSE_HTTP_PORT=8123 +FINANCE_CLICKHOUSE_DATABASE=finance_dev +FINANCE_CLICKHOUSE_USER=finance_dev +FINANCE_CLICKHOUSE_PASSWORD= \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d8ac960 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.* +!.gitignore +!.env +/target/ +/dbt_packages/ +/logs/ \ No newline at end of file diff --git a/Deploy.sh b/Deploy.sh new file mode 100644 index 0000000..bbc7190 --- /dev/null +++ b/Deploy.sh @@ -0,0 +1,18 @@ +if [ -z "$deployment" ]; then + echo "deployment is not set" + exit 1 +fi +if [ -z "$namespace" ]; then + echo "namespace is not set" + exit 1 +fi +if [ -z "$image_tag" ]; then + echo "image_tag is not set" + exit 1 +fi +if { [ "$deployment" == "dev" ] || [ "$deployment" == "test" ] }; then + ingress_host=finance-dbt-doc.${deployment}.mujiannan.com +else + ingress_host=finance-dbt-doc.mujiannan.com +fi +envsubst < Deploy.yml | kubectl apply -f - \ No newline at end of file diff --git a/Deploy.yml b/Deploy.yml new file mode 100644 index 0000000..7b14ef0 --- /dev/null +++ b/Deploy.yml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: Service +metadata: + name: finance-dbt-doc + namespace: ${namespace} +spec: + selector: + app: finance-dbt-doc + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finance-dbt-doc + namespace: ${namespace} +spec: + replicas: 1 + selector: + matchLabels: + app: finance-dbt-doc + template: + metadata: + labels: + app: finance-dbt-doc + spec: + containers: + - name: finance-dbt-doc + image: docker-registry.mujiannan.com:5001/mujiannan/finance-dbt:${image_tag} + ports: + - containerPort: 8080 + command: ["bash", "/app/doc-server.sh", ""] + resources: + requests: + cpu: 10m + memory: 10Mi + limits: + cpu: 1000m + memory: 200Mi + envFrom: + - secretRef: + name: finance-dbt-doc-envs +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: finance-dbt-doc + namespace: ${namespace} + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /$1 +spec: + ingressClassName: nginx + rules: + - host: ${ingress_host} + http: + paths: + - pathType: Prefix + path: /(.*) + backend: + service: + name: finance-dbt-doc + port: + number: 8080 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1eee139 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.10.14-slim +USER 1001 +WORKDIR /app +### Pip +USER root +COPY ./requirements.txt /app/ +RUN pip3 config set global.index-url https://nexus.mujiannan.com:5001/repository/pypiserver/simple && \ + pip install -r requirements.txt +USER 1001 +## Copy files +COPY --chown=1001 . /app +## Install packges/dependencies +### Dbt +RUN dbt deps \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..69ffc8e --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,100 @@ +/* groovylint-disable NestedBlockDepth */ +// Description: Jenkinsfile for finance_dbt + +pipeline { + agent { + kubernetes { + inheritFrom 'kaniko' + } + } + environment { + DESTINATION_MACHINE = 'shennan@physical-ubuntu-server' + ORG = 'mujiannan' + PROJECT = 'finance-dbt' + DOCKER_REGISTRY = 'docker-registry.mujiannan.com:5001' + SMS_URL = 'https://nas.mujiannan.com:5001/webapi/entry.cgi?api=SYNO.Chat.External&method=incoming&version=2&token=%22XO8yFQyi66SStEPY7AALimGbNjMsZE85i3m0UlF0siIQtyn1deqSomp0CUheOwlE%22' // NOSONAR + } + stages { + stage('Pre Check') { + steps { + script { + // Retrieve the latest commit message + def String commitMessage = sh(returnStdout: 'true', script: 'git log -1 --pretty=%B').trim() + echo "Commit message: ${commitMessage}" + // Set a flag based on the commit message + if (commitMessage.contains('[SKIP CI]')) { + env.SKIP_CI = 'true' + } + // Set a flag based on the commit message + if ( + commitMessage.contains('[SKIP CD]') || env.SKIP_CI == 'true' + ) { + env.SKIP_CD = 'true' + } + } + script { + // Set the image tag, GIT_TAG is preferred, if not available, use the commit hash abbreviated + env.IMAGE_TAG = sh(returnStdout: 'true', script: 'git describe --tags --always').trim() + } + } + } + stage('Build') { + when { + not { + expression { + return env.SKIP_CI == 'true' + } + } + } + steps { + container('kaniko') { + // Build the Docker image + sh """ + /kaniko/executor \ + --context . \ + --cache=true \ + --cache-dir="${WORKSPACE}/kaniko-cache" \ + --registry-mirror="${DOCKER_REGISTRY}" \ + --destination "${DOCKER_REGISTRY}/${ORG}/${PROJECT}:${IMAGE_TAG}" \ + --label org.opencontainers.image.branch=${BRANCH_NAME} \ + --label org.opencontainers.image.build_tag=${BUILD_TAG} \ + --label org.opencontainers.image.revision="${GIT_COMMIT}" \ + --label org.opencontainers.image.version="${IMAGE_TAG}" \ + --label org.opencontainers.image.url="${BUILD_URL}" \ + --label org.opencontainers.image.source="${GIT_URL}" + """ + } + } + } + stage('Deploy') { + when { + not { + expression { + return env.SKIP_CD == 'true' + } + } + } + steps { + withKubeConfig([namespace: "${BRANCH_NAME}"]) { + container('kubectl') { + sh ''' + if { + [ "dev" -eq "$BRANCH_NAME" ] || + [ "test" -eq "$BRANCH_NAME" ] || + [ "prod" -eq "$BRANCH_NAME" ] + }; then + export deployment=$BRANCH_NAME + export namespace=$BRANCH_NAME + else + export deployment=dev + export namespace=dev + fi + export image_tag=$IMAGE_TAG + . Deploy.sh + ''' + } + } + } + } + } +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..55642ec --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# Welcome to finance dbt project! + +### Prepare +```bash +cp env.sh env.dev.sh +``` +Change configurations in .env.dev.sh +```bash +source env.dev.sh +``` + +### Run dbt +Try running the following commands: +- dbt deps +- dbt run +- dbt test + +### Run in docker +1. Run pipeline + ```bash + docker run + ``` +2. As document-server + ```bash + docker run -p :80 sleep 99d + ``` + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/dbt_project.yml b/dbt_project.yml new file mode 100644 index 0000000..53c6172 --- /dev/null +++ b/dbt_project.yml @@ -0,0 +1,37 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'finance_dbt' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'finance_dbt' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + finance_dbt: + # Config indicated by + and applies to all files under models/example/ + dw: + +materialized: table \ No newline at end of file diff --git a/debug-image.sh b/debug-image.sh new file mode 100644 index 0000000..d288fa8 --- /dev/null +++ b/debug-image.sh @@ -0,0 +1,35 @@ +# build +tag=debug +docker_registry=docker-registry.mujiannan.com:5001 +finance_dbt_image=${docker_registry}/mujiannan/finance-dbt:${tag} +docker build -t $finance_dbt_image . || exit 1 +docker push $finance_dbt_image +# run +name=finance_dbt +docker stop $name || echo 'No container to stop...continue...' +docker rm $name || echo 'No container to remove...continue...' +docker run \ + -d \ + --env-file=.dev.env \ + --name=$name \ + $finance_dbt_image \ + sleep 99d + +# run doc-server +name=finance_dbt_doc +docker stop $name || echo 'No container to stop...continue...' +docker rm $name || echo 'No container to remove...continue...' +docker run \ + -i \ + -p 8080:8080 \ + --env-file=.dev.env \ + --name=$name \ + --entrypoint=bash \ + ${finance_dbt_image} \ + /app/doc-server.sh + +## test kubernetes +export deployment=dev +export namespace=dev +export image_tag=debug +envsubst < deploy.yml | kubectl apply -f - \ No newline at end of file diff --git a/deploy-secrets.sh b/deploy-secrets.sh new file mode 100644 index 0000000..7213876 --- /dev/null +++ b/deploy-secrets.sh @@ -0,0 +1,5 @@ + +# Pul your env file on local machine +kubectl create secret generic finance-dbt-doc-envs --from-env-file=./.dev.env --namespace=dev +kubectl create secret generic finance-dbt-doc-envs --from-env-file=./.test.env --namespace=test +kubectl create secret generic finance-dbt-doc-envs --from-env-file=./.prod.env --namespace=prod \ No newline at end of file diff --git a/doc-server.sh b/doc-server.sh new file mode 100644 index 0000000..3ee7f97 --- /dev/null +++ b/doc-server.sh @@ -0,0 +1,2 @@ +dbt docs generate +dbt docs serve \ No newline at end of file diff --git a/macros/optimize_table_final.sql b/macros/optimize_table_final.sql new file mode 100644 index 0000000..69b1d21 --- /dev/null +++ b/macros/optimize_table_final.sql @@ -0,0 +1,3 @@ +{% macro optimize_table_final(table_name) %} + Optimize Table {{ table_name }} Final +{% endmacro %} diff --git a/models/dw/dw.dim_date.sql b/models/dw/dw.dim_date.sql new file mode 100644 index 0000000..75a4f53 --- /dev/null +++ b/models/dw/dw.dim_date.sql @@ -0,0 +1,30 @@ + +/* + 日期 +*/ +{{ + config( + materialized='table', + engine='MergeTree', + order_by='date_id' + ) +}} +SELECT + toUInt32(year(date) * 10000 + month(date) * 100 + day(date)) AS date_id, + date AS full_date, + toYear(date) AS year, + toUInt8(quarter(date)) AS quarter, + toUInt8(month(date)) AS month, + toUInt8(toDayOfMonth(date)) AS day, + toUInt8(week(date)) AS week_num_of_year, + toUInt8(toDayOfWeek(date)) AS day_num_of_week +FROM ( + SELECT + arrayJoin( + arrayMap( + x -> toDate('1990-01-01') + x, + range(toUInt32(dateDiff('day', toDate('1990-01-01'), toDate('2031-01-01')))) + ) + ) AS date +) +ORDER BY date_id \ No newline at end of file diff --git a/models/dw/dw.dim_exchange.sql b/models/dw/dw.dim_exchange.sql new file mode 100644 index 0000000..fe8a54a --- /dev/null +++ b/models/dw/dw.dim_exchange.sql @@ -0,0 +1,20 @@ + +/* + 交易所 +*/ + +{{ config(materialized='view') }} + +with source_data as ( + + select 0 As exchange_id + , 'SSE' As ts_exchange_code + , '上海证券交易所' As exchange_name + Union All + select 1 As exchange_id + , 'SZSE' As ts_exchange_code + , '深圳证券交易所' As exchange_name +) + +select * +from source_data diff --git a/models/dw/dw.dim_hs_calendar.sql b/models/dw/dw.dim_hs_calendar.sql new file mode 100644 index 0000000..2004f86 --- /dev/null +++ b/models/dw/dw.dim_hs_calendar.sql @@ -0,0 +1,17 @@ +/* + 沪深交易日历 +*/ +{{ + config( + materialized='table', + engine="MergeTree", + order_by='(exchange_id, date_id)', + ) +}} + +Select `Exchange`.`exchange_id` As `exchange_id` + , Cast(`StgCalendar`.`cal_date` As Int32) As `date_id` + , Cast(`StgCalendar`.`is_open` As Int8) As `is_open` +From {{ source('finance', 'stg.tushare_trade_calendar') }} As `StgCalendar` Final +All Inner Join {{ ref('dw.dim_exchange') }} As `Exchange` Final + On `StgCalendar`.`exchange` = `Exchange`.`ts_exchange_code` \ No newline at end of file diff --git a/models/dw/dw.dim_stock.sql b/models/dw/dw.dim_stock.sql new file mode 100644 index 0000000..8bf8d62 --- /dev/null +++ b/models/dw/dw.dim_stock.sql @@ -0,0 +1,45 @@ + +/* + 股票列表 +*/ +{{ + config( + materialized='table', + engine='MergeTree', + order_by='ts_code' + ) +}} +SELECT + StgStock.ts_code As ts_code + , DimExchange.exchange_id As exchange_id + , arrayElement(splitByChar('.', StgStock.`ts_code`),1) As code + , StgStock.name As name + , StgStock.fullname As full_name + , StgStock.enname As name_en + , StgStock.cnspell As cn_spell + , StgStock.area As area + , StgStock.industry As industry + , StgStock.market As market + , StgStock.curr_type As currency + , ListDate.full_date As list_date + , Case When StgStock.delist_date Is Null Or StgStock.delist_date = '' Then Null Else DelistDate.full_date End As delist_date + , Case + When StgStock.list_status = 'L' + Then 1 + Else 0 + End As is_active + , Case + When is_hs In ('H', 'S') + Then 1 + Else 0 + End As is_hongkong_connect + , act_name As actual_controller + , act_ent_type As actual_controller_enterprise_type + , now() As dt +From {{ source('finance', 'stg.tushare_stock_basic') }} As StgStock Final +Left Join {{ ref("dw.dim_exchange") }} As DimExchange + On StgStock.exchange = DimExchange.ts_exchange_code +Left Join {{ ref("dw.dim_date") }} As ListDate + On StgStock.list_date = Cast(ListDate.date_id As String) +Left Join {{ ref("dw.dim_date") }} As DelistDate + On StgStock.delist_date = Cast(DelistDate.date_id As String) diff --git a/models/dw/dw.dim_time.sql b/models/dw/dw.dim_time.sql new file mode 100644 index 0000000..91c2b1a --- /dev/null +++ b/models/dw/dw.dim_time.sql @@ -0,0 +1,24 @@ +{{ + config( + materialized='table', + engine='MergeTree', + order_by='time_id' + ) +}} +Select cast(formatDateTime(`full_time`, '%Y%m%d%H%i') As Int64) As time_id + , `full_time` + , `date_id` + , hour(`full_time`) As `hour` + , minute(`full_time`) As `minute` +From( + SELECT + arrayJoin( + arrayMap( + x -> toStartOfMinute(toDateTime(toDate(`full_date`)) + x * 60), + range(0, 24 * 60) + ) + ) AS `full_time` + , `date_id` + From {{ ref("dw.dim_date") }} + Where `date_id` >= 20090101 +) As T \ No newline at end of file diff --git a/models/dw/dw.fact_stock_daily.sql b/models/dw/dw.fact_stock_daily.sql new file mode 100644 index 0000000..e5858a3 --- /dev/null +++ b/models/dw/dw.fact_stock_daily.sql @@ -0,0 +1,49 @@ +{{ + config( + materialized='incremental', + engine='MergeTree', + order_by='ts_code, date_id', + unique_key=['ts_code', 'date_id'], + incremental_strategy='delete+insert', + ) +}} +Select + StgDaily.ts_code As ts_code, + DimDate.date_id As date_id, + StgDaily.open As open, + StgDaily.high As high, + StgDaily.low As low, + StgDaily.close As close, + StgDaily.pre_close As pre_close, + StgDaily.change As change, + StgDaily.pct_chg As pct_chg, + StgDaily.vol As vol, + StgDaily.amount As amount, + StgAdjFactor.adj_factor As adj_factor, + StgDailyBasic.turnover_rate As turnover_rate, + StgDailyBasic.turnover_rate_f As turnover_rate_f, + StgDailyBasic.volume_ratio As volume_ratio, + StgDailyBasic.pe As pe, + StgDailyBasic.pe_ttm As pe_ttm, + StgDailyBasic.pb As pb, + StgDailyBasic.ps As ps, + StgDailyBasic.ps_ttm As ps_ttm, + StgDailyBasic.dv_ratio As dv_ratio, + StgDailyBasic.dv_ttm As dv_ttm, + StgDailyBasic.total_share As total_share, + StgDailyBasic.float_share As float_share, + StgDailyBasic.free_share As free_share, + StgDailyBasic.total_mv As total_mv, + StgDailyBasic.circ_mv As circ_mv +From {{ source('finance', 'stg.tushare_daily') }} As StgDaily Final +Inner Join {{ ref('dw.dim_date') }} As DimDate + On StgDaily.trade_date = Cast(DimDate.date_id As String) +Any Left Join {{ source('finance', 'stg.tushare_daily_basic') }} As StgDailyBasic + On StgDaily.ts_code = StgDailyBasic.ts_code + And StgDaily.trade_date = StgDailyBasic.trade_date +Any Left Join {{ source('finance', 'stg.tushare_adj_factor') }} As StgAdjFactor + On StgDaily.ts_code = StgAdjFactor.ts_code + And StgDaily.trade_date = StgAdjFactor.trade_date +{% if is_incremental() %} + Where StgDaily.trade_date >= (Select cast(max(date_id) As String) From {{ this }}) +{% endif %} \ No newline at end of file diff --git a/models/dw/dw.fact_stock_minute.sql b/models/dw/dw.fact_stock_minute.sql new file mode 100644 index 0000000..36e2eb8 --- /dev/null +++ b/models/dw/dw.fact_stock_minute.sql @@ -0,0 +1,34 @@ +{{ + config( + materialized='materialized_view', + engine='MergeTree', + order_by='ts_code, time_id', + unique_key=['ts_code', 'time_id'], + partition_by=['cast(floor(time_id / 1000000) As Int32)'], + query_settings={"join_algorithm": "'partial_merge'"} + ) +}} + +Select + StgMinutes.ts_code As ts_code, + DimTime.time_id As time_id, + StgMinutes.close As close, + StgMinutes.open As open, + StgMinutes.high As high, + StgMinutes.low As low, + StgMinutes.vol As vol, + StgMinutes.amount As amount, + StgAdjFactor.adj_factor As adj_factor, + now() As dt +From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes +Inner Join {{ ref('dw.dim_time') }} As DimTime + On StgMinutes.trade_time = formatDateTimeInJodaSyntax(full_time, 'yyyy-MM-dd HH:mm:ss') +Any Left Join {{ source('finance', 'stg.tushare_adj_factor') }} As StgAdjFactor + On StgMinutes.ts_code = StgAdjFactor.ts_code + And StgAdjFactor.trade_date = Cast(DimTime.date_id As String) +where 1 = 1 +{% if 'dev' in target.name %} +And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(today(), 'yyyy-01-01 00:00:00') +{% elif target.name == 'test' %} +And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Year,-1,today()), 'yyyy-01-01 00:00:00') +{% endif %} \ No newline at end of file diff --git a/models/dw/schema.yml b/models/dw/schema.yml new file mode 100644 index 0000000..9c264c5 --- /dev/null +++ b/models/dw/schema.yml @@ -0,0 +1,292 @@ + +version: 2 + +models: + - name: dw.dim_exchange + description: "交易所" + columns: + - name: exchange_id + description: "The primary key for this table" + tests: + - unique + - not_null + - name: ts_exchange_code + description: "交易所代码" + tests: + - not_null + - unique + - name: exchange_name + description: "交易所名称" + tests: + - not_null + - unique + - name: dw.dim_date + description: "日期" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - year + - month + - day + columns: + - name: date_id + description: "日期键" + tests: + - unique + - not_null + - name: full_date + description: "日期" + tests: + - not_null + - unique + - name: year + description: "年" + tests: + - not_null + - name: quarter + description: "季度" + tests: + - not_null + - name: month + description: "月" + tests: + - not_null + - name: day + description: "日" + tests: + - not_null + - name: week_num_of_year + description: "年中第几周" + tests: + - not_null + - name: day_num_of_week + description: "星期" + tests: + - not_null + - name: dw.dim_time + description: "时间" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date_id + - hour + - minute + columns: + - name: time_id + description: "时间键" + tests: + - unique + - not_null + - name: full_time + description: "时间" + tests: + - not_null + - unique + - name: date_id + description: "日期键" + tests: + - not_null + - name: hour + description: "小时" + tests: + - not_null + - name: minute + description: "分钟" + tests: + - not_null + - name: dw.dim_hs_calendar + description: "沪深交易日历" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange_id + - date_id + columns: + - name: exchange_id + description: "The primary key for this table" + tests: + - not_null + - name: date_id + description: "交易日期键" + tests: + - not_null + - name: is_open + description: "是否交易" + tests: + - not_null + - name: dw.dim_stock + description: "股票" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange_id + - code + columns: + - name: ts_code + description: "tushare 股票编码" + tests: + - unique + - not_null + - name: exchange_id + description: "交易所键" + tests: + - not_null + - name: code + description: "股票代码" + tests: + - not_null + - name: name + description: "股票名称" + tests: + - not_null + - unique + - name: full_name + description: "股票全称" + tests: + - not_null + - unique + - name: name_en + description: "股票英文名称" + tests: + - not_null + - name: cn_spell + description: "拼音缩写" + tests: + - not_null + - name: area + description: "地区" + - name: industry + description: "行业" + - name: market + description: "市场类型 (主板/中小板/创业板)" + - name: currency + description: "交易货币" + - name: list_date + description: "上市日期" + - name: delist_date + description: "退市日期" + - name: is_active + description: "当前是否在市" + tests: + - not_null + - name: is_hongkong_connect + description: "是否沪深通标的" + tests: + - not_null + - name: actual_controller + description: "实际控制人" + - name: actual_controller_enterprise_type + description: "实际控制人企业类型" + - name: dt + description: "更新时间" + - name: dw.fact_stock_daily + description: "股票日数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - date_id + columns: + - name: ts_code + description: "股票键" + tests: + - not_null + - name: date_id + description: "日期键" + tests: + - not_null + - name: open + description: "开盘价" + - name: high + description: "最高价" + - name: low + description: "最低价" + - name: close + description: "收盘价" + - name: pre_close + description: "昨收价" + - name: change + description: "涨跌额" + - name: pct_chg + description: "涨跌幅" + - name: vol + description: "成交量" + - name: amount + description: "成交额" + - name: adj_factor + description: "复权因子" + - name: turnover_rate + description: "换手率" + - name: turnover_rate_free + description: "换手率(自由流通股)" + - name: volume_ratio + description: "量比" + - name: pe + description: "市盈率(总市值/净利润, 亏损的PE为空)" + - name: pe_ttm + description: "市盈率(TTM)" + - name: pb + description: "市净率(总市值/净资产)" + - name: ps + description: "市销率" + - name: ps_ttm + description: "市销率(TTM)" + - name: dv_ratio + description: "股息率" + - name: dv_ttm + description: "股息率(TTM)" + - name: total_share + description: "总股本(万股)" + - name: float_share + description: "流通股本(万股)" + - name: free_share + description: "自由流通股本(万)" + - name: total_mv + description: "总市值(万元)" + - name: circ_mv + description: "流通市值(万元)" + - name: dw.fact_stock_minute + description: "股票分钟数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - time_id + columns: + - name: ts_code + description: "股票键" + data_type: "String" + tests: + - not_null + - name: time_id + description: "时间键" + data_type: "Int64" + tests: + - not_null + - name: open + description: "开盘价" + data_type: "Nullable(Decimal(16,2))" + - name: high + description: "最高价" + data_type: "Nullable(Decimal(16,2))" + - name: low + description: "最低价" + data_type: "Nullable(Decimal(16,2))" + - name: close + description: "收盘价" + data_type: "Nullable(Decimal(16,2))" + - name: vol + description: "成交量" + data_type: "Nullable(Int32)" + - name: amount + description: "成交额" + data_type: "Nullable(Decimal(18,2))" + - name: adj_factor + description: "复权因子" + data_type: "Decimal(16,6)" + tests: + - not_null + - name: dt + description: "时间" + data_type: "DateTime" + \ No newline at end of file diff --git a/models/finance_source.yml b/models/finance_source.yml new file mode 100644 index 0000000..1d1b3a8 --- /dev/null +++ b/models/finance_source.yml @@ -0,0 +1,253 @@ +version: 2 +sources: +- name: finance + schema: '{{ env_var("FINANCE_CLICKHOUSE_DATABASE") }}' + description: "The source for financial data" + tables: + - name: stg.tushare_trade_calendar + description: > + The source table for the trade calendar data from Tushare + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange + - cal_date + columns: + - name: exchange + description: "The exchange code" + tests: + - not_null + - name: cal_date + description: "The date" + tests: + - not_null + - name: is_open + description: "Whether the exchange is open" + tests: + - not_null + - name: pretrade_date + description: "The previous trading date" + - name: stg.tushare_stock_basic + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: symbol + description: 股票代码 + - name: name + description: 股票名称 + - name: area + description: 所在地域 + - name: industry + description: 所属行业 + - name: fullname + description: 股票全称 + - name: enname + description: 英文全称 + - name: cnspell + description: 拼音缩写 + - name: market + description: 市场类型 (主板/中小板/创业板) + - name: exchange + description: 交易所代码 + tests: + - not_null + - name: curr_type + description: 交易货币 + - name: list_status + description: 上市状态: L上市 D退市 P暂停上市 + - name: list_date + description: 上市日期 + - name: delist_date + description: 退市日期 + - name: is_hs + description: 是否沪深港通标的,N否 H沪股通 S深股通 + - name: act_name + description: 实控人名称 + - name: act_ent_type + description: 实控人企业性质 + - name: dt + description: 最后修改时间 + unique_composite: + - name: unique_exchange_ts_code + columns: + - exchange + - ts_code + - name: stg.tushare_daily + description: "沪深股票交易日线数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: trade_date + description: 交易日期 + tests: + - not_null + - name: open + description: 开盘价 + tests: + - not_null + - name: high + description: 最高价 + tests: + - not_null + - name: low + description: 最低价 + tests: + - not_null + - name: close + description: 收盘价 + tests: + - not_null + - name: pre_close + description: 昨收价 + tests: + - not_null + - name: change + description: 涨跌额 + tests: + - not_null + - name: pct_chg + description: 涨跌幅 + tests: + - not_null + - name: vol + description: 成交量 + tests: + - not_null + - name: amount + description: 成交额 + - name: stg.tushare_daily_basic + description: "沪深股票每日指标数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + + columns: + - name: ts_code + description: "股票代码" + data_type: String + - name: trade_date + description: "交易日期" + data_type: String + - name: close + description: "当日收盘价" + data_type: Nullable(Float32) + - name: turnover_rate + description: "换手率(%)" + data_type: Nullable(Float32) + - name: turnover_rate_f + description: "换手率(自由流通股)" + data_type: Nullable(Float32) + - name: volume_ratio + description: "量比" + data_type: Nullable(Float32) + - name: pe + description: "市盈率(总市值/净利润, 亏损的PE为空)" + data_type: Nullable(Float32) + - name: pe_ttm + description: "市盈率(TTM,亏损的PE为空)" + data_type: Nullable(Float32) + - name: pb + description: "市净率(总市值/净资产)" + data_type: Nullable(Float32) + - name: ps + description: "市销率" + data_type: Nullable(Float32) + - name: ps_ttm + description: "市销率(TTM)" + data_type: Nullable(Float32) + - name: dv_ratio + description: "股息率 (%)" + data_type: Nullable(Float32) + - name: dv_ttm + description: "股息率(TTM)(%)" + data_type: Nullable(Float32) + - name: total_share + description: "总股本 (万股)" + data_type: Nullable(Float32) + - name: float_share + description: "流通股本 (万股)" + data_type: Nullable(Float32) + - name: free_share + description: "自由流通股本 (万)" + data_type: Nullable(Float32) + - name: total_mv + description: "总市值 (万元)" + data_type: Nullable(Float32) + - name: circ_mv + description: "流通市值(万元)" + data_type: Nullable(Float32) + - name: stg.tushare_adj_factor + description: "沪深股票复权因子" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + columns: + - name: ts_code + description: "股票代码" + data_type: String + - name: trade_date + description: "交易日期" + data_type: String + - name: adj_factor + description: "复权因子" + data_type: Nullable(Decimal(16, 6)) + - name: dt + description: "更新日期" + data_type: DateTime + - name: stg.tushare_minutes + description: "沪深分钟级交易数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_time + config: + where: "left(trade_time,7)>=concat(cast(year(today())-1 As String), '-01')" + columns: + - name: ts_code + description: "The stock code." + data_type: String + tests: + - not_null + - name: trade_time + description: "The trading time." + data_type: String + tests: + - not_null + - name: close + description: "The closing price." + data_type: Nullable(Float32) + - name: open + description: "The opening price." + data_type: Nullable(Float32) + - name: high + description: "The highest price." + data_type: Nullable(Float32) + - name: low + description: "The lowest price." + data_type: Nullable(Float32) + - name: vol + description: "The volume of trades." + data_type: Nullable(Float32) + - name: amount + description: "The amount of trades." + data_type: Nullable(Float32) + meta: + engine: ReplacingMergeTree + partition_by: "left(trade_time, 7)" + order_by: "(ts_code, trade_time)" + settings: + index_granularity: 8192 diff --git a/package-lock.yml b/package-lock.yml new file mode 100644 index 0000000..eddc3be --- /dev/null +++ b/package-lock.yml @@ -0,0 +1,4 @@ +packages: +- package: dbt-labs/dbt_utils + version: 1.1.1 +sha1_hash: a158c48c59c2bb7d729d2a4e215aabe5bb4f3353 diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..6152b33 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.1.1 \ No newline at end of file diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000..c189897 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,36 @@ +finance_dbt: + target: "{{ env_var('FINANCE_DBT_TARGET') }}" + outputs: + .template: &default_template # 定义锚点 + type: clickhouse + schema: "{{ env_var('FINANCE_CLICKHOUSE_DATABASE') }}" + # optional + driver: http + host: "{{ env_var('FINANCE_CLICKHOUSE_HOST') }}" + port: "{{ env_var('FINANCE_CLICKHOUSE_HTTP_PORT') | as_number }}" + user: "{{ env_var('FINANCE_CLICKHOUSE_USER') }}" + password: "{{ env_var('FINANCE_CLICKHOUSE_PASSWORD') }}" + verify: True + secure: False + retries: 1 + compression: gzip + connect_timeout: 10 + send_receive_timeout: 300 + cluster_mode: False + use_lw_deletes: True + check_exchange: True + local_suffix: _local + allow_automatic_deduplication: True + custom_settings: {} + + # Native (clickhouse-driver) connection settings + sync_request_timeout: 5 + compress_block_size: 1048576 + dev: + <<: *default_template # 引用锚点 + test: + <<: *default_template # 引用锚点 + prod: + <<: *default_template # 引用锚点 + local_dev: + <<: *default_template # 引用锚点 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9d13d56 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +dbt==1.0.0.37.0 +dbt-clickhouse==1.7.3 \ No newline at end of file