设置DATA_PATH,读取已保存的数据¶

In [1]:
import os
os.environ['DATA_PATH'] = 'E:/code/tgtrader/data/akshare_data.db'
In [2]:
# DuckDBQuery 用于查询duckdb数据库  
from tgtrader.utils.duckdb_query import DuckDBQuery
# DuckDBQueryDF 用于查询pandas dataframe
from tgtrader.utils.duckdb_query_df import DuckDBQueryDF

from tgtrader.common import DataSource
import time
import pandas as pd

# ignore warnings
import warnings
warnings.filterwarnings("ignore")
2025-01-05 21:30:20.982 | INFO     | tgtrader.data_provider.dao.akshare.common:<module>:11 - akshare main_db: E:/code/tgtrader/data/akshare_data.db
In [3]:
# 设置读取数据库文件的db_query
data_source = DataSource.Akshare
db_query = DuckDBQuery(data_source)

数据读取到dataframe¶

In [4]:
df = db_query.fetch_df("select * from t_kdata")
In [5]:
df = df.sort_values(by=['code', 'date'])

创建DuckDBQueryDF, 后续可以直接用sql来查询df¶

In [6]:
duckdb_df_query = DuckDBQueryDF(df)

计算SMA¶

用duckdb查询(可直接对df进行查询)¶

In [7]:
sql = """
  SELECT
    code,
    date,
    open,
    low,
    high,
    close,
    volume,
    -- 计算 20 日 SMA
    AVG(close) OVER (PARTITION BY code ORDER BY date ROWS BETWEEN 19 PRECEDING AND CURRENT ROW) AS sma_20
  FROM df
"""

start_time = time.perf_counter()
df_duck = duckdb_df_query.query(sql)
end_time = time.perf_counter()
print(f"duckdb 查询时间: {end_time - start_time} 秒")
duckdb 查询时间: 4.358183600001212 秒

用pandas计算¶

In [8]:
# pandas 查询
start_time = time.perf_counter()

# 对每个股票计算 sma_20
df_pandas = df.groupby('code').apply(lambda x: x.assign(sma_20=x['close'].rolling(window=20, min_periods=0).mean()))

end_time = time.perf_counter()
print(f"pandas 查询时间: {end_time - start_time} 秒")
pandas 查询时间: 9.910378100001253 秒
In [9]:
df_duck.shape
Out[9]:
(12108336, 8)
In [10]:
df_pandas.shape
Out[10]:
(12108336, 12)
In [11]:
df_duck.query("code=='000001'")
Out[11]:
code date open low high close volume sma_20
2173488 000001 2010-01-04 1402.930054 1353.550049 1406.449951 1355.310059 241923.0 1355.310059
2173489 000001 2010-01-05 1357.660034 1298.880005 1366.479980 1331.209961 556500.0 1343.260010
2173490 000001 2010-01-06 1328.270020 1297.119995 1328.270020 1307.699951 412143.0 1331.406657
2173491 000001 2010-01-07 1307.699951 1278.300049 1316.510010 1293.000000 355337.0 1321.804993
2173492 000001 2010-01-08 1284.180054 1275.369995 1298.880005 1290.060059 288543.0 1315.456006
... ... ... ... ... ... ... ... ...
2299285 000001 2024-12-25 2336.639893 2333.389893 2362.639893 2346.389893 1475283.0 2296.251453
2299286 000001 2024-12-26 2346.389893 2323.639893 2348.020020 2336.639893 1000075.0 2300.476953
2299287 000001 2024-12-27 2338.270020 2304.129883 2343.139893 2331.760010 1290012.0 2304.133459
2299288 000001 2024-12-30 2323.639893 2323.639893 2354.520020 2351.270020 1351846.0 2308.684460
2299289 000001 2024-12-31 2348.020020 2310.639893 2357.770020 2310.639893 1475367.0 2310.391455

3573 rows × 8 columns

In [12]:
df_pandas.query("code=='000001'")
Out[12]:
code date open close high low volume adjust_type source create_time update_time sma_20
10465023 000001 2010-01-04 1402.930054 1355.310059 1406.449951 1353.550049 241923.0 hfq akshare 1736048554348 1736048554348 1355.310059
10465024 000001 2010-01-05 1357.660034 1331.209961 1366.479980 1298.880005 556500.0 hfq akshare 1736048554348 1736048554348 1343.260010
10465025 000001 2010-01-06 1328.270020 1307.699951 1328.270020 1297.119995 412143.0 hfq akshare 1736048554348 1736048554348 1331.406657
10465026 000001 2010-01-07 1307.699951 1293.000000 1316.510010 1278.300049 355337.0 hfq akshare 1736048554348 1736048554348 1321.804993
10465027 000001 2010-01-08 1284.180054 1290.060059 1298.880005 1275.369995 288543.0 hfq akshare 1736048554348 1736048554348 1315.456006
... ... ... ... ... ... ... ... ... ... ... ... ...
7769755 000001 2024-12-25 2336.639893 2346.389893 2362.639893 2333.389893 1475283.0 hfq akshare 1735745107736 1735745107736 2296.251453
7769756 000001 2024-12-26 2346.389893 2336.639893 2348.020020 2323.639893 1000075.0 hfq akshare 1735745107736 1735745107736 2300.476953
7769757 000001 2024-12-27 2338.270020 2331.760010 2343.139893 2304.129883 1290012.0 hfq akshare 1735745107736 1735745107736 2304.133459
7769758 000001 2024-12-30 2323.639893 2351.270020 2354.520020 2323.639893 1351846.0 hfq akshare 1735745107736 1735745107736 2308.684460
7769759 000001 2024-12-31 2348.020020 2310.639893 2357.770020 2310.639893 1475367.0 hfq akshare 1735745107736 1735745107736 2310.391455

3573 rows × 12 columns