设置DATA_PATH,读取已保存的数据¶
In [1]:
import os
os.environ['DATA_PATH'] = 'E:/code/tgtrader/data/akshare_data.db'
In [2]:
# DuckDBQuery 用于查询duckdb数据库
from tgtrader.utils.duckdb_query import DuckDBQuery
# DuckDBQueryDF 用于查询pandas dataframe
from tgtrader.utils.duckdb_query_df import DuckDBQueryDF
from tgtrader.common import DataSource
import time
import pandas as pd
# ignore warnings
import warnings
warnings.filterwarnings("ignore")
2025-01-05 21:30:20.982 | INFO | tgtrader.data_provider.dao.akshare.common:<module>:11 - akshare main_db: E:/code/tgtrader/data/akshare_data.db
In [3]:
# 设置读取数据库文件的db_query
data_source = DataSource.Akshare
db_query = DuckDBQuery(data_source)
数据读取到dataframe¶
In [4]:
df = db_query.fetch_df("select * from t_kdata")
In [5]:
df = df.sort_values(by=['code', 'date'])
创建DuckDBQueryDF, 后续可以直接用sql来查询df¶
In [6]:
duckdb_df_query = DuckDBQueryDF(df)
计算SMA¶
用duckdb查询(可直接对df进行查询)¶
In [7]:
sql = """
SELECT
code,
date,
open,
low,
high,
close,
volume,
-- 计算 20 日 SMA
AVG(close) OVER (PARTITION BY code ORDER BY date ROWS BETWEEN 19 PRECEDING AND CURRENT ROW) AS sma_20
FROM df
"""
start_time = time.perf_counter()
df_duck = duckdb_df_query.query(sql)
end_time = time.perf_counter()
print(f"duckdb 查询时间: {end_time - start_time} 秒")
duckdb 查询时间: 4.358183600001212 秒
用pandas计算¶
In [8]:
# pandas 查询
start_time = time.perf_counter()
# 对每个股票计算 sma_20
df_pandas = df.groupby('code').apply(lambda x: x.assign(sma_20=x['close'].rolling(window=20, min_periods=0).mean()))
end_time = time.perf_counter()
print(f"pandas 查询时间: {end_time - start_time} 秒")
pandas 查询时间: 9.910378100001253 秒
In [9]:
df_duck.shape
Out[9]:
(12108336, 8)
In [10]:
df_pandas.shape
Out[10]:
(12108336, 12)
In [11]:
df_duck.query("code=='000001'")
Out[11]:
code | date | open | low | high | close | volume | sma_20 | |
---|---|---|---|---|---|---|---|---|
2173488 | 000001 | 2010-01-04 | 1402.930054 | 1353.550049 | 1406.449951 | 1355.310059 | 241923.0 | 1355.310059 |
2173489 | 000001 | 2010-01-05 | 1357.660034 | 1298.880005 | 1366.479980 | 1331.209961 | 556500.0 | 1343.260010 |
2173490 | 000001 | 2010-01-06 | 1328.270020 | 1297.119995 | 1328.270020 | 1307.699951 | 412143.0 | 1331.406657 |
2173491 | 000001 | 2010-01-07 | 1307.699951 | 1278.300049 | 1316.510010 | 1293.000000 | 355337.0 | 1321.804993 |
2173492 | 000001 | 2010-01-08 | 1284.180054 | 1275.369995 | 1298.880005 | 1290.060059 | 288543.0 | 1315.456006 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2299285 | 000001 | 2024-12-25 | 2336.639893 | 2333.389893 | 2362.639893 | 2346.389893 | 1475283.0 | 2296.251453 |
2299286 | 000001 | 2024-12-26 | 2346.389893 | 2323.639893 | 2348.020020 | 2336.639893 | 1000075.0 | 2300.476953 |
2299287 | 000001 | 2024-12-27 | 2338.270020 | 2304.129883 | 2343.139893 | 2331.760010 | 1290012.0 | 2304.133459 |
2299288 | 000001 | 2024-12-30 | 2323.639893 | 2323.639893 | 2354.520020 | 2351.270020 | 1351846.0 | 2308.684460 |
2299289 | 000001 | 2024-12-31 | 2348.020020 | 2310.639893 | 2357.770020 | 2310.639893 | 1475367.0 | 2310.391455 |
3573 rows × 8 columns
In [12]:
df_pandas.query("code=='000001'")
Out[12]:
code | date | open | close | high | low | volume | adjust_type | source | create_time | update_time | sma_20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
10465023 | 000001 | 2010-01-04 | 1402.930054 | 1355.310059 | 1406.449951 | 1353.550049 | 241923.0 | hfq | akshare | 1736048554348 | 1736048554348 | 1355.310059 |
10465024 | 000001 | 2010-01-05 | 1357.660034 | 1331.209961 | 1366.479980 | 1298.880005 | 556500.0 | hfq | akshare | 1736048554348 | 1736048554348 | 1343.260010 |
10465025 | 000001 | 2010-01-06 | 1328.270020 | 1307.699951 | 1328.270020 | 1297.119995 | 412143.0 | hfq | akshare | 1736048554348 | 1736048554348 | 1331.406657 |
10465026 | 000001 | 2010-01-07 | 1307.699951 | 1293.000000 | 1316.510010 | 1278.300049 | 355337.0 | hfq | akshare | 1736048554348 | 1736048554348 | 1321.804993 |
10465027 | 000001 | 2010-01-08 | 1284.180054 | 1290.060059 | 1298.880005 | 1275.369995 | 288543.0 | hfq | akshare | 1736048554348 | 1736048554348 | 1315.456006 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7769755 | 000001 | 2024-12-25 | 2336.639893 | 2346.389893 | 2362.639893 | 2333.389893 | 1475283.0 | hfq | akshare | 1735745107736 | 1735745107736 | 2296.251453 |
7769756 | 000001 | 2024-12-26 | 2346.389893 | 2336.639893 | 2348.020020 | 2323.639893 | 1000075.0 | hfq | akshare | 1735745107736 | 1735745107736 | 2300.476953 |
7769757 | 000001 | 2024-12-27 | 2338.270020 | 2331.760010 | 2343.139893 | 2304.129883 | 1290012.0 | hfq | akshare | 1735745107736 | 1735745107736 | 2304.133459 |
7769758 | 000001 | 2024-12-30 | 2323.639893 | 2351.270020 | 2354.520020 | 2323.639893 | 1351846.0 | hfq | akshare | 1735745107736 | 1735745107736 | 2308.684460 |
7769759 | 000001 | 2024-12-31 | 2348.020020 | 2310.639893 | 2357.770020 | 2310.639893 | 1475367.0 | hfq | akshare | 1735745107736 | 1735745107736 | 2310.391455 |
3573 rows × 12 columns