From 51677ab7a32ae22a6b6af6675b19e597f23d1d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B2=88=E6=A5=A0?= Date: Mon, 8 Apr 2024 11:43:51 +0800 Subject: [PATCH] init: create finance-dbt --- .env | 6 + .gitignore | 6 + DebugImage.sh | 35 ++++ Deploy.sh | 18 ++ Deploy.yml | 65 +++++++ DeploySecrets.sh | 5 + Dockerfile | 14 ++ Jenkinsfile | 100 ++++++++++ README.md | 33 ++++ dbt_project.yml | 37 ++++ doc-server.sh | 2 + macros/optimize_table_final.sql | 3 + models/dw/dw.dim_date.sql | 30 +++ models/dw/dw.dim_exchange.sql | 20 ++ models/dw/dw.dim_hs_calendar.sql | 17 ++ models/dw/dw.dim_stock.sql | 45 +++++ models/dw/dw.dim_time.sql | 24 +++ models/dw/dw.fact_stock_daily.sql | 49 +++++ models/dw/dw.fact_stock_minute.sql | 34 ++++ models/dw/schema.yml | 292 +++++++++++++++++++++++++++++ models/finance_source.yml | 253 +++++++++++++++++++++++++ package-lock.yml | 4 + packages.yml | 3 + profiles.yml | 36 ++++ requirements.txt | 2 + 25 files changed, 1133 insertions(+) create mode 100644 .env create mode 100644 .gitignore create mode 100644 DebugImage.sh create mode 100644 Deploy.sh create mode 100644 Deploy.yml create mode 100644 DeploySecrets.sh create mode 100644 Dockerfile create mode 100644 Jenkinsfile create mode 100644 README.md create mode 100644 dbt_project.yml create mode 100644 doc-server.sh create mode 100644 macros/optimize_table_final.sql create mode 100644 models/dw/dw.dim_date.sql create mode 100644 models/dw/dw.dim_exchange.sql create mode 100644 models/dw/dw.dim_hs_calendar.sql create mode 100644 models/dw/dw.dim_stock.sql create mode 100644 models/dw/dw.dim_time.sql create mode 100644 models/dw/dw.fact_stock_daily.sql create mode 100644 models/dw/dw.fact_stock_minute.sql create mode 100644 models/dw/schema.yml create mode 100644 models/finance_source.yml create mode 100644 package-lock.yml create mode 100644 packages.yml create mode 100644 profiles.yml create mode 100644 requirements.txt diff --git a/.env b/.env new file mode 100644 index 0000000..3c867ac --- /dev/null +++ b/.env @@ -0,0 +1,6 @@ +FINANCE_DBT_TARGET=dev +FINANCE_CLICKHOUSE_HOST=localhost +FINANCE_CLICKHOUSE_HTTP_PORT=8123 +FINANCE_CLICKHOUSE_DATABASE=finance_dev +FINANCE_CLICKHOUSE_USER=finance_dev +FINANCE_CLICKHOUSE_PASSWORD= \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d8ac960 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.* +!.gitignore +!.env +/target/ +/dbt_packages/ +/logs/ \ No newline at end of file diff --git a/DebugImage.sh b/DebugImage.sh new file mode 100644 index 0000000..d288fa8 --- /dev/null +++ b/DebugImage.sh @@ -0,0 +1,35 @@ +# build +tag=debug +docker_registry=docker-registry.mujiannan.com:5001 +finance_dbt_image=${docker_registry}/mujiannan/finance-dbt:${tag} +docker build -t $finance_dbt_image . || exit 1 +docker push $finance_dbt_image +# run +name=finance_dbt +docker stop $name || echo 'No container to stop...continue...' +docker rm $name || echo 'No container to remove...continue...' +docker run \ + -d \ + --env-file=.dev.env \ + --name=$name \ + $finance_dbt_image \ + sleep 99d + +# run doc-server +name=finance_dbt_doc +docker stop $name || echo 'No container to stop...continue...' +docker rm $name || echo 'No container to remove...continue...' +docker run \ + -i \ + -p 8080:8080 \ + --env-file=.dev.env \ + --name=$name \ + --entrypoint=bash \ + ${finance_dbt_image} \ + /app/doc-server.sh + +## test kubernetes +export deployment=dev +export namespace=dev +export image_tag=debug +envsubst < deploy.yml | kubectl apply -f - \ No newline at end of file diff --git a/Deploy.sh b/Deploy.sh new file mode 100644 index 0000000..bbc7190 --- /dev/null +++ b/Deploy.sh @@ -0,0 +1,18 @@ +if [ -z "$deployment" ]; then + echo "deployment is not set" + exit 1 +fi +if [ -z "$namespace" ]; then + echo "namespace is not set" + exit 1 +fi +if [ -z "$image_tag" ]; then + echo "image_tag is not set" + exit 1 +fi +if { [ "$deployment" == "dev" ] || [ "$deployment" == "test" ] }; then + ingress_host=finance-dbt-doc.${deployment}.mujiannan.com +else + ingress_host=finance-dbt-doc.mujiannan.com +fi +envsubst < Deploy.yml | kubectl apply -f - \ No newline at end of file diff --git a/Deploy.yml b/Deploy.yml new file mode 100644 index 0000000..7b14ef0 --- /dev/null +++ b/Deploy.yml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: Service +metadata: + name: finance-dbt-doc + namespace: ${namespace} +spec: + selector: + app: finance-dbt-doc + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finance-dbt-doc + namespace: ${namespace} +spec: + replicas: 1 + selector: + matchLabels: + app: finance-dbt-doc + template: + metadata: + labels: + app: finance-dbt-doc + spec: + containers: + - name: finance-dbt-doc + image: docker-registry.mujiannan.com:5001/mujiannan/finance-dbt:${image_tag} + ports: + - containerPort: 8080 + command: ["bash", "/app/doc-server.sh", ""] + resources: + requests: + cpu: 10m + memory: 10Mi + limits: + cpu: 1000m + memory: 200Mi + envFrom: + - secretRef: + name: finance-dbt-doc-envs +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: finance-dbt-doc + namespace: ${namespace} + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /$1 +spec: + ingressClassName: nginx + rules: + - host: ${ingress_host} + http: + paths: + - pathType: Prefix + path: /(.*) + backend: + service: + name: finance-dbt-doc + port: + number: 8080 diff --git a/DeploySecrets.sh b/DeploySecrets.sh new file mode 100644 index 0000000..7213876 --- /dev/null +++ b/DeploySecrets.sh @@ -0,0 +1,5 @@ + +# Pul your env file on local machine +kubectl create secret generic finance-dbt-doc-envs --from-env-file=./.dev.env --namespace=dev +kubectl create secret generic finance-dbt-doc-envs --from-env-file=./.test.env --namespace=test +kubectl create secret generic finance-dbt-doc-envs --from-env-file=./.prod.env --namespace=prod \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1eee139 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.10.14-slim +USER 1001 +WORKDIR /app +### Pip +USER root +COPY ./requirements.txt /app/ +RUN pip3 config set global.index-url https://nexus.mujiannan.com:5001/repository/pypiserver/simple && \ + pip install -r requirements.txt +USER 1001 +## Copy files +COPY --chown=1001 . /app +## Install packges/dependencies +### Dbt +RUN dbt deps \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..69ffc8e --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,100 @@ +/* groovylint-disable NestedBlockDepth */ +// Description: Jenkinsfile for finance_dbt + +pipeline { + agent { + kubernetes { + inheritFrom 'kaniko' + } + } + environment { + DESTINATION_MACHINE = 'shennan@physical-ubuntu-server' + ORG = 'mujiannan' + PROJECT = 'finance-dbt' + DOCKER_REGISTRY = 'docker-registry.mujiannan.com:5001' + SMS_URL = 'https://nas.mujiannan.com:5001/webapi/entry.cgi?api=SYNO.Chat.External&method=incoming&version=2&token=%22XO8yFQyi66SStEPY7AALimGbNjMsZE85i3m0UlF0siIQtyn1deqSomp0CUheOwlE%22' // NOSONAR + } + stages { + stage('Pre Check') { + steps { + script { + // Retrieve the latest commit message + def String commitMessage = sh(returnStdout: 'true', script: 'git log -1 --pretty=%B').trim() + echo "Commit message: ${commitMessage}" + // Set a flag based on the commit message + if (commitMessage.contains('[SKIP CI]')) { + env.SKIP_CI = 'true' + } + // Set a flag based on the commit message + if ( + commitMessage.contains('[SKIP CD]') || env.SKIP_CI == 'true' + ) { + env.SKIP_CD = 'true' + } + } + script { + // Set the image tag, GIT_TAG is preferred, if not available, use the commit hash abbreviated + env.IMAGE_TAG = sh(returnStdout: 'true', script: 'git describe --tags --always').trim() + } + } + } + stage('Build') { + when { + not { + expression { + return env.SKIP_CI == 'true' + } + } + } + steps { + container('kaniko') { + // Build the Docker image + sh """ + /kaniko/executor \ + --context . \ + --cache=true \ + --cache-dir="${WORKSPACE}/kaniko-cache" \ + --registry-mirror="${DOCKER_REGISTRY}" \ + --destination "${DOCKER_REGISTRY}/${ORG}/${PROJECT}:${IMAGE_TAG}" \ + --label org.opencontainers.image.branch=${BRANCH_NAME} \ + --label org.opencontainers.image.build_tag=${BUILD_TAG} \ + --label org.opencontainers.image.revision="${GIT_COMMIT}" \ + --label org.opencontainers.image.version="${IMAGE_TAG}" \ + --label org.opencontainers.image.url="${BUILD_URL}" \ + --label org.opencontainers.image.source="${GIT_URL}" + """ + } + } + } + stage('Deploy') { + when { + not { + expression { + return env.SKIP_CD == 'true' + } + } + } + steps { + withKubeConfig([namespace: "${BRANCH_NAME}"]) { + container('kubectl') { + sh ''' + if { + [ "dev" -eq "$BRANCH_NAME" ] || + [ "test" -eq "$BRANCH_NAME" ] || + [ "prod" -eq "$BRANCH_NAME" ] + }; then + export deployment=$BRANCH_NAME + export namespace=$BRANCH_NAME + else + export deployment=dev + export namespace=dev + fi + export image_tag=$IMAGE_TAG + . Deploy.sh + ''' + } + } + } + } + } +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..55642ec --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# Welcome to finance dbt project! + +### Prepare +```bash +cp env.sh env.dev.sh +``` +Change configurations in .env.dev.sh +```bash +source env.dev.sh +``` + +### Run dbt +Try running the following commands: +- dbt deps +- dbt run +- dbt test + +### Run in docker +1. Run pipeline + ```bash + docker run + ``` +2. As document-server + ```bash + docker run -p :80 sleep 99d + ``` + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/dbt_project.yml b/dbt_project.yml new file mode 100644 index 0000000..53c6172 --- /dev/null +++ b/dbt_project.yml @@ -0,0 +1,37 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'finance_dbt' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'finance_dbt' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + finance_dbt: + # Config indicated by + and applies to all files under models/example/ + dw: + +materialized: table \ No newline at end of file diff --git a/doc-server.sh b/doc-server.sh new file mode 100644 index 0000000..3ee7f97 --- /dev/null +++ b/doc-server.sh @@ -0,0 +1,2 @@ +dbt docs generate +dbt docs serve \ No newline at end of file diff --git a/macros/optimize_table_final.sql b/macros/optimize_table_final.sql new file mode 100644 index 0000000..69b1d21 --- /dev/null +++ b/macros/optimize_table_final.sql @@ -0,0 +1,3 @@ +{% macro optimize_table_final(table_name) %} + Optimize Table {{ table_name }} Final +{% endmacro %} diff --git a/models/dw/dw.dim_date.sql b/models/dw/dw.dim_date.sql new file mode 100644 index 0000000..75a4f53 --- /dev/null +++ b/models/dw/dw.dim_date.sql @@ -0,0 +1,30 @@ + +/* + 日期 +*/ +{{ + config( + materialized='table', + engine='MergeTree', + order_by='date_id' + ) +}} +SELECT + toUInt32(year(date) * 10000 + month(date) * 100 + day(date)) AS date_id, + date AS full_date, + toYear(date) AS year, + toUInt8(quarter(date)) AS quarter, + toUInt8(month(date)) AS month, + toUInt8(toDayOfMonth(date)) AS day, + toUInt8(week(date)) AS week_num_of_year, + toUInt8(toDayOfWeek(date)) AS day_num_of_week +FROM ( + SELECT + arrayJoin( + arrayMap( + x -> toDate('1990-01-01') + x, + range(toUInt32(dateDiff('day', toDate('1990-01-01'), toDate('2031-01-01')))) + ) + ) AS date +) +ORDER BY date_id \ No newline at end of file diff --git a/models/dw/dw.dim_exchange.sql b/models/dw/dw.dim_exchange.sql new file mode 100644 index 0000000..fe8a54a --- /dev/null +++ b/models/dw/dw.dim_exchange.sql @@ -0,0 +1,20 @@ + +/* + 交易所 +*/ + +{{ config(materialized='view') }} + +with source_data as ( + + select 0 As exchange_id + , 'SSE' As ts_exchange_code + , '上海证券交易所' As exchange_name + Union All + select 1 As exchange_id + , 'SZSE' As ts_exchange_code + , '深圳证券交易所' As exchange_name +) + +select * +from source_data diff --git a/models/dw/dw.dim_hs_calendar.sql b/models/dw/dw.dim_hs_calendar.sql new file mode 100644 index 0000000..2004f86 --- /dev/null +++ b/models/dw/dw.dim_hs_calendar.sql @@ -0,0 +1,17 @@ +/* + 沪深交易日历 +*/ +{{ + config( + materialized='table', + engine="MergeTree", + order_by='(exchange_id, date_id)', + ) +}} + +Select `Exchange`.`exchange_id` As `exchange_id` + , Cast(`StgCalendar`.`cal_date` As Int32) As `date_id` + , Cast(`StgCalendar`.`is_open` As Int8) As `is_open` +From {{ source('finance', 'stg.tushare_trade_calendar') }} As `StgCalendar` Final +All Inner Join {{ ref('dw.dim_exchange') }} As `Exchange` Final + On `StgCalendar`.`exchange` = `Exchange`.`ts_exchange_code` \ No newline at end of file diff --git a/models/dw/dw.dim_stock.sql b/models/dw/dw.dim_stock.sql new file mode 100644 index 0000000..8bf8d62 --- /dev/null +++ b/models/dw/dw.dim_stock.sql @@ -0,0 +1,45 @@ + +/* + 股票列表 +*/ +{{ + config( + materialized='table', + engine='MergeTree', + order_by='ts_code' + ) +}} +SELECT + StgStock.ts_code As ts_code + , DimExchange.exchange_id As exchange_id + , arrayElement(splitByChar('.', StgStock.`ts_code`),1) As code + , StgStock.name As name + , StgStock.fullname As full_name + , StgStock.enname As name_en + , StgStock.cnspell As cn_spell + , StgStock.area As area + , StgStock.industry As industry + , StgStock.market As market + , StgStock.curr_type As currency + , ListDate.full_date As list_date + , Case When StgStock.delist_date Is Null Or StgStock.delist_date = '' Then Null Else DelistDate.full_date End As delist_date + , Case + When StgStock.list_status = 'L' + Then 1 + Else 0 + End As is_active + , Case + When is_hs In ('H', 'S') + Then 1 + Else 0 + End As is_hongkong_connect + , act_name As actual_controller + , act_ent_type As actual_controller_enterprise_type + , now() As dt +From {{ source('finance', 'stg.tushare_stock_basic') }} As StgStock Final +Left Join {{ ref("dw.dim_exchange") }} As DimExchange + On StgStock.exchange = DimExchange.ts_exchange_code +Left Join {{ ref("dw.dim_date") }} As ListDate + On StgStock.list_date = Cast(ListDate.date_id As String) +Left Join {{ ref("dw.dim_date") }} As DelistDate + On StgStock.delist_date = Cast(DelistDate.date_id As String) diff --git a/models/dw/dw.dim_time.sql b/models/dw/dw.dim_time.sql new file mode 100644 index 0000000..91c2b1a --- /dev/null +++ b/models/dw/dw.dim_time.sql @@ -0,0 +1,24 @@ +{{ + config( + materialized='table', + engine='MergeTree', + order_by='time_id' + ) +}} +Select cast(formatDateTime(`full_time`, '%Y%m%d%H%i') As Int64) As time_id + , `full_time` + , `date_id` + , hour(`full_time`) As `hour` + , minute(`full_time`) As `minute` +From( + SELECT + arrayJoin( + arrayMap( + x -> toStartOfMinute(toDateTime(toDate(`full_date`)) + x * 60), + range(0, 24 * 60) + ) + ) AS `full_time` + , `date_id` + From {{ ref("dw.dim_date") }} + Where `date_id` >= 20090101 +) As T \ No newline at end of file diff --git a/models/dw/dw.fact_stock_daily.sql b/models/dw/dw.fact_stock_daily.sql new file mode 100644 index 0000000..e5858a3 --- /dev/null +++ b/models/dw/dw.fact_stock_daily.sql @@ -0,0 +1,49 @@ +{{ + config( + materialized='incremental', + engine='MergeTree', + order_by='ts_code, date_id', + unique_key=['ts_code', 'date_id'], + incremental_strategy='delete+insert', + ) +}} +Select + StgDaily.ts_code As ts_code, + DimDate.date_id As date_id, + StgDaily.open As open, + StgDaily.high As high, + StgDaily.low As low, + StgDaily.close As close, + StgDaily.pre_close As pre_close, + StgDaily.change As change, + StgDaily.pct_chg As pct_chg, + StgDaily.vol As vol, + StgDaily.amount As amount, + StgAdjFactor.adj_factor As adj_factor, + StgDailyBasic.turnover_rate As turnover_rate, + StgDailyBasic.turnover_rate_f As turnover_rate_f, + StgDailyBasic.volume_ratio As volume_ratio, + StgDailyBasic.pe As pe, + StgDailyBasic.pe_ttm As pe_ttm, + StgDailyBasic.pb As pb, + StgDailyBasic.ps As ps, + StgDailyBasic.ps_ttm As ps_ttm, + StgDailyBasic.dv_ratio As dv_ratio, + StgDailyBasic.dv_ttm As dv_ttm, + StgDailyBasic.total_share As total_share, + StgDailyBasic.float_share As float_share, + StgDailyBasic.free_share As free_share, + StgDailyBasic.total_mv As total_mv, + StgDailyBasic.circ_mv As circ_mv +From {{ source('finance', 'stg.tushare_daily') }} As StgDaily Final +Inner Join {{ ref('dw.dim_date') }} As DimDate + On StgDaily.trade_date = Cast(DimDate.date_id As String) +Any Left Join {{ source('finance', 'stg.tushare_daily_basic') }} As StgDailyBasic + On StgDaily.ts_code = StgDailyBasic.ts_code + And StgDaily.trade_date = StgDailyBasic.trade_date +Any Left Join {{ source('finance', 'stg.tushare_adj_factor') }} As StgAdjFactor + On StgDaily.ts_code = StgAdjFactor.ts_code + And StgDaily.trade_date = StgAdjFactor.trade_date +{% if is_incremental() %} + Where StgDaily.trade_date >= (Select cast(max(date_id) As String) From {{ this }}) +{% endif %} \ No newline at end of file diff --git a/models/dw/dw.fact_stock_minute.sql b/models/dw/dw.fact_stock_minute.sql new file mode 100644 index 0000000..36e2eb8 --- /dev/null +++ b/models/dw/dw.fact_stock_minute.sql @@ -0,0 +1,34 @@ +{{ + config( + materialized='materialized_view', + engine='MergeTree', + order_by='ts_code, time_id', + unique_key=['ts_code', 'time_id'], + partition_by=['cast(floor(time_id / 1000000) As Int32)'], + query_settings={"join_algorithm": "'partial_merge'"} + ) +}} + +Select + StgMinutes.ts_code As ts_code, + DimTime.time_id As time_id, + StgMinutes.close As close, + StgMinutes.open As open, + StgMinutes.high As high, + StgMinutes.low As low, + StgMinutes.vol As vol, + StgMinutes.amount As amount, + StgAdjFactor.adj_factor As adj_factor, + now() As dt +From {{ source('finance', 'stg.tushare_minutes') }} As StgMinutes +Inner Join {{ ref('dw.dim_time') }} As DimTime + On StgMinutes.trade_time = formatDateTimeInJodaSyntax(full_time, 'yyyy-MM-dd HH:mm:ss') +Any Left Join {{ source('finance', 'stg.tushare_adj_factor') }} As StgAdjFactor + On StgMinutes.ts_code = StgAdjFactor.ts_code + And StgAdjFactor.trade_date = Cast(DimTime.date_id As String) +where 1 = 1 +{% if 'dev' in target.name %} +And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(today(), 'yyyy-01-01 00:00:00') +{% elif target.name == 'test' %} +And StgMinutes.trade_time >= formatDateTimeInJodaSyntax(dateAdd(Year,-1,today()), 'yyyy-01-01 00:00:00') +{% endif %} \ No newline at end of file diff --git a/models/dw/schema.yml b/models/dw/schema.yml new file mode 100644 index 0000000..9c264c5 --- /dev/null +++ b/models/dw/schema.yml @@ -0,0 +1,292 @@ + +version: 2 + +models: + - name: dw.dim_exchange + description: "交易所" + columns: + - name: exchange_id + description: "The primary key for this table" + tests: + - unique + - not_null + - name: ts_exchange_code + description: "交易所代码" + tests: + - not_null + - unique + - name: exchange_name + description: "交易所名称" + tests: + - not_null + - unique + - name: dw.dim_date + description: "日期" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - year + - month + - day + columns: + - name: date_id + description: "日期键" + tests: + - unique + - not_null + - name: full_date + description: "日期" + tests: + - not_null + - unique + - name: year + description: "年" + tests: + - not_null + - name: quarter + description: "季度" + tests: + - not_null + - name: month + description: "月" + tests: + - not_null + - name: day + description: "日" + tests: + - not_null + - name: week_num_of_year + description: "年中第几周" + tests: + - not_null + - name: day_num_of_week + description: "星期" + tests: + - not_null + - name: dw.dim_time + description: "时间" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date_id + - hour + - minute + columns: + - name: time_id + description: "时间键" + tests: + - unique + - not_null + - name: full_time + description: "时间" + tests: + - not_null + - unique + - name: date_id + description: "日期键" + tests: + - not_null + - name: hour + description: "小时" + tests: + - not_null + - name: minute + description: "分钟" + tests: + - not_null + - name: dw.dim_hs_calendar + description: "沪深交易日历" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange_id + - date_id + columns: + - name: exchange_id + description: "The primary key for this table" + tests: + - not_null + - name: date_id + description: "交易日期键" + tests: + - not_null + - name: is_open + description: "是否交易" + tests: + - not_null + - name: dw.dim_stock + description: "股票" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange_id + - code + columns: + - name: ts_code + description: "tushare 股票编码" + tests: + - unique + - not_null + - name: exchange_id + description: "交易所键" + tests: + - not_null + - name: code + description: "股票代码" + tests: + - not_null + - name: name + description: "股票名称" + tests: + - not_null + - unique + - name: full_name + description: "股票全称" + tests: + - not_null + - unique + - name: name_en + description: "股票英文名称" + tests: + - not_null + - name: cn_spell + description: "拼音缩写" + tests: + - not_null + - name: area + description: "地区" + - name: industry + description: "行业" + - name: market + description: "市场类型 (主板/中小板/创业板)" + - name: currency + description: "交易货币" + - name: list_date + description: "上市日期" + - name: delist_date + description: "退市日期" + - name: is_active + description: "当前是否在市" + tests: + - not_null + - name: is_hongkong_connect + description: "是否沪深通标的" + tests: + - not_null + - name: actual_controller + description: "实际控制人" + - name: actual_controller_enterprise_type + description: "实际控制人企业类型" + - name: dt + description: "更新时间" + - name: dw.fact_stock_daily + description: "股票日数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - date_id + columns: + - name: ts_code + description: "股票键" + tests: + - not_null + - name: date_id + description: "日期键" + tests: + - not_null + - name: open + description: "开盘价" + - name: high + description: "最高价" + - name: low + description: "最低价" + - name: close + description: "收盘价" + - name: pre_close + description: "昨收价" + - name: change + description: "涨跌额" + - name: pct_chg + description: "涨跌幅" + - name: vol + description: "成交量" + - name: amount + description: "成交额" + - name: adj_factor + description: "复权因子" + - name: turnover_rate + description: "换手率" + - name: turnover_rate_free + description: "换手率(自由流通股)" + - name: volume_ratio + description: "量比" + - name: pe + description: "市盈率(总市值/净利润, 亏损的PE为空)" + - name: pe_ttm + description: "市盈率(TTM)" + - name: pb + description: "市净率(总市值/净资产)" + - name: ps + description: "市销率" + - name: ps_ttm + description: "市销率(TTM)" + - name: dv_ratio + description: "股息率" + - name: dv_ttm + description: "股息率(TTM)" + - name: total_share + description: "总股本(万股)" + - name: float_share + description: "流通股本(万股)" + - name: free_share + description: "自由流通股本(万)" + - name: total_mv + description: "总市值(万元)" + - name: circ_mv + description: "流通市值(万元)" + - name: dw.fact_stock_minute + description: "股票分钟数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - time_id + columns: + - name: ts_code + description: "股票键" + data_type: "String" + tests: + - not_null + - name: time_id + description: "时间键" + data_type: "Int64" + tests: + - not_null + - name: open + description: "开盘价" + data_type: "Nullable(Decimal(16,2))" + - name: high + description: "最高价" + data_type: "Nullable(Decimal(16,2))" + - name: low + description: "最低价" + data_type: "Nullable(Decimal(16,2))" + - name: close + description: "收盘价" + data_type: "Nullable(Decimal(16,2))" + - name: vol + description: "成交量" + data_type: "Nullable(Int32)" + - name: amount + description: "成交额" + data_type: "Nullable(Decimal(18,2))" + - name: adj_factor + description: "复权因子" + data_type: "Decimal(16,6)" + tests: + - not_null + - name: dt + description: "时间" + data_type: "DateTime" + \ No newline at end of file diff --git a/models/finance_source.yml b/models/finance_source.yml new file mode 100644 index 0000000..1d1b3a8 --- /dev/null +++ b/models/finance_source.yml @@ -0,0 +1,253 @@ +version: 2 +sources: +- name: finance + schema: '{{ env_var("FINANCE_CLICKHOUSE_DATABASE") }}' + description: "The source for financial data" + tables: + - name: stg.tushare_trade_calendar + description: > + The source table for the trade calendar data from Tushare + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - exchange + - cal_date + columns: + - name: exchange + description: "The exchange code" + tests: + - not_null + - name: cal_date + description: "The date" + tests: + - not_null + - name: is_open + description: "Whether the exchange is open" + tests: + - not_null + - name: pretrade_date + description: "The previous trading date" + - name: stg.tushare_stock_basic + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: symbol + description: 股票代码 + - name: name + description: 股票名称 + - name: area + description: 所在地域 + - name: industry + description: 所属行业 + - name: fullname + description: 股票全称 + - name: enname + description: 英文全称 + - name: cnspell + description: 拼音缩写 + - name: market + description: 市场类型 (主板/中小板/创业板) + - name: exchange + description: 交易所代码 + tests: + - not_null + - name: curr_type + description: 交易货币 + - name: list_status + description: 上市状态: L上市 D退市 P暂停上市 + - name: list_date + description: 上市日期 + - name: delist_date + description: 退市日期 + - name: is_hs + description: 是否沪深港通标的,N否 H沪股通 S深股通 + - name: act_name + description: 实控人名称 + - name: act_ent_type + description: 实控人企业性质 + - name: dt + description: 最后修改时间 + unique_composite: + - name: unique_exchange_ts_code + columns: + - exchange + - ts_code + - name: stg.tushare_daily + description: "沪深股票交易日线数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + columns: + - name: ts_code + description: TS代码 + tests: + - not_null + - name: trade_date + description: 交易日期 + tests: + - not_null + - name: open + description: 开盘价 + tests: + - not_null + - name: high + description: 最高价 + tests: + - not_null + - name: low + description: 最低价 + tests: + - not_null + - name: close + description: 收盘价 + tests: + - not_null + - name: pre_close + description: 昨收价 + tests: + - not_null + - name: change + description: 涨跌额 + tests: + - not_null + - name: pct_chg + description: 涨跌幅 + tests: + - not_null + - name: vol + description: 成交量 + tests: + - not_null + - name: amount + description: 成交额 + - name: stg.tushare_daily_basic + description: "沪深股票每日指标数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + + columns: + - name: ts_code + description: "股票代码" + data_type: String + - name: trade_date + description: "交易日期" + data_type: String + - name: close + description: "当日收盘价" + data_type: Nullable(Float32) + - name: turnover_rate + description: "换手率(%)" + data_type: Nullable(Float32) + - name: turnover_rate_f + description: "换手率(自由流通股)" + data_type: Nullable(Float32) + - name: volume_ratio + description: "量比" + data_type: Nullable(Float32) + - name: pe + description: "市盈率(总市值/净利润, 亏损的PE为空)" + data_type: Nullable(Float32) + - name: pe_ttm + description: "市盈率(TTM,亏损的PE为空)" + data_type: Nullable(Float32) + - name: pb + description: "市净率(总市值/净资产)" + data_type: Nullable(Float32) + - name: ps + description: "市销率" + data_type: Nullable(Float32) + - name: ps_ttm + description: "市销率(TTM)" + data_type: Nullable(Float32) + - name: dv_ratio + description: "股息率 (%)" + data_type: Nullable(Float32) + - name: dv_ttm + description: "股息率(TTM)(%)" + data_type: Nullable(Float32) + - name: total_share + description: "总股本 (万股)" + data_type: Nullable(Float32) + - name: float_share + description: "流通股本 (万股)" + data_type: Nullable(Float32) + - name: free_share + description: "自由流通股本 (万)" + data_type: Nullable(Float32) + - name: total_mv + description: "总市值 (万元)" + data_type: Nullable(Float32) + - name: circ_mv + description: "流通市值(万元)" + data_type: Nullable(Float32) + - name: stg.tushare_adj_factor + description: "沪深股票复权因子" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_date + columns: + - name: ts_code + description: "股票代码" + data_type: String + - name: trade_date + description: "交易日期" + data_type: String + - name: adj_factor + description: "复权因子" + data_type: Nullable(Decimal(16, 6)) + - name: dt + description: "更新日期" + data_type: DateTime + - name: stg.tushare_minutes + description: "沪深分钟级交易数据" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ts_code + - trade_time + config: + where: "left(trade_time,7)>=concat(cast(year(today())-1 As String), '-01')" + columns: + - name: ts_code + description: "The stock code." + data_type: String + tests: + - not_null + - name: trade_time + description: "The trading time." + data_type: String + tests: + - not_null + - name: close + description: "The closing price." + data_type: Nullable(Float32) + - name: open + description: "The opening price." + data_type: Nullable(Float32) + - name: high + description: "The highest price." + data_type: Nullable(Float32) + - name: low + description: "The lowest price." + data_type: Nullable(Float32) + - name: vol + description: "The volume of trades." + data_type: Nullable(Float32) + - name: amount + description: "The amount of trades." + data_type: Nullable(Float32) + meta: + engine: ReplacingMergeTree + partition_by: "left(trade_time, 7)" + order_by: "(ts_code, trade_time)" + settings: + index_granularity: 8192 diff --git a/package-lock.yml b/package-lock.yml new file mode 100644 index 0000000..eddc3be --- /dev/null +++ b/package-lock.yml @@ -0,0 +1,4 @@ +packages: +- package: dbt-labs/dbt_utils + version: 1.1.1 +sha1_hash: a158c48c59c2bb7d729d2a4e215aabe5bb4f3353 diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..6152b33 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.1.1 \ No newline at end of file diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000..c189897 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,36 @@ +finance_dbt: + target: "{{ env_var('FINANCE_DBT_TARGET') }}" + outputs: + .template: &default_template # 定义锚点 + type: clickhouse + schema: "{{ env_var('FINANCE_CLICKHOUSE_DATABASE') }}" + # optional + driver: http + host: "{{ env_var('FINANCE_CLICKHOUSE_HOST') }}" + port: "{{ env_var('FINANCE_CLICKHOUSE_HTTP_PORT') | as_number }}" + user: "{{ env_var('FINANCE_CLICKHOUSE_USER') }}" + password: "{{ env_var('FINANCE_CLICKHOUSE_PASSWORD') }}" + verify: True + secure: False + retries: 1 + compression: gzip + connect_timeout: 10 + send_receive_timeout: 300 + cluster_mode: False + use_lw_deletes: True + check_exchange: True + local_suffix: _local + allow_automatic_deduplication: True + custom_settings: {} + + # Native (clickhouse-driver) connection settings + sync_request_timeout: 5 + compress_block_size: 1048576 + dev: + <<: *default_template # 引用锚点 + test: + <<: *default_template # 引用锚点 + prod: + <<: *default_template # 引用锚点 + local_dev: + <<: *default_template # 引用锚点 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9d13d56 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +dbt==1.0.0.37.0 +dbt-clickhouse==1.7.3 \ No newline at end of file