readabs.read_rba_table
Read a table from the RBA website and store it in a pandas DataFrame.
1"""Read a table from the RBA website and store it in a pandas DataFrame.""" 2 3from typing import Any, cast 4from io import BytesIO 5import re 6from pandas import ( 7 DataFrame, 8 DatetimeIndex, 9 PeriodIndex, 10 Period, 11 Index, 12 read_excel, 13 Series, 14 Timestamp, 15 period_range, 16) 17 18# local imports 19from readabs.rba_catalogue import rba_catalogue 20from readabs.download_cache import get_file, HttpError, CacheError 21from readabs.rba_meta_data import rba_metacol as rm 22 23 24# --- PRIVATE --- 25def _get_excel_file( 26 table: str, 27 ignore_errors: bool, 28 **kwargs: Any, 29) -> bytes | None: 30 """Get the Excel file from the RBA website for the given table. 31 Return bytes if successful, otherwise return None. 32 Raises an exception if ignore_errors is False.""" 33 34 # get the relevant URL for a table moniker 35 cat_map = rba_catalogue() 36 if table not in cat_map.index: 37 message = f"Table '{table}' not found in RBA catalogue." 38 if ignore_errors: 39 print(f"Ignoring error: {message}") 40 return None 41 raise ValueError(message) 42 url = str(cat_map.loc[table, "URL"]) 43 44 # get Excel file - try different file name extensions 45 # becasue the RBA website sometimes changes the file 46 # extension in error 47 urls = [ 48 url, 49 ] 50 rex = re.compile(r"\.[^/]*$") 51 match = rex.search(url) 52 if match is not None: 53 tail = match.group() 54 replace_with = {".xls": ".xlsx", ".xlsx": ".xls"} 55 new_url = re.sub(rex, replace_with.get(tail, tail), url) 56 if new_url != url: 57 urls += [new_url] 58 59 # try to get the Excel file - including with different exensions 60 for this_url in urls: 61 try: 62 excel = get_file(this_url, **kwargs) 63 except (HttpError, CacheError) as e: 64 if this_url == urls[-1]: 65 if ignore_errors: 66 print(f"Ignoring error: {e}") 67 return None 68 raise 69 else: 70 break 71 72 return excel 73 74 75# --- PUBLIC --- 76def read_rba_table(table: str, **kwargs: Any) -> tuple[DataFrame, DataFrame]: 77 """Read a table from the RBA website and return the actual data 78 and the meta data in a tuple of two DataFrames. 79 80 Parameters 81 ---------- 82 table : str 83 The table to read from the RBA website. 84 **kwargs : Any 85 Additional keyword arguments. 86 The only keyword argument that is used is ignore_errors. 87 ignore_errors : bool = False 88 If True, then any major errors encountered will be printed and the function 89 will return empty DataFrames. If False, then any major errors encountered 90 will raise an exception. 91 92 Returns 93 ------- 94 tuple[DataFrame, DataFrame] 95 The primary data and the meta data in a tuple of two DataFrames. 96 97 Examples 98 -------- 99 ```python 100 data, meta = read_rba_table("C1") 101 ```""" 102 103 # set-up 104 ignore_errors = kwargs.get("ignore_errors", False) 105 data, meta = DataFrame(), DataFrame() 106 107 # get the Excel file 108 excel = _get_excel_file(table, ignore_errors, **kwargs) 109 if excel is None: 110 return data, meta 111 112 # read Excel file into DataFrame 113 try: 114 raw = read_excel(BytesIO(excel), header=None, index_col=None) 115 except Exception as e: 116 if ignore_errors: 117 print(f"Ignoring error: {e}") 118 return data, meta 119 raise 120 121 # extract the meta data 122 meta = raw.iloc[1:11, :].T.copy() 123 meta.columns = Index(meta.iloc[0]) 124 renamer = { 125 "Mnemonic": rm.id, 126 } # historical data is inconsistent 127 meta = meta.rename(columns=renamer) 128 meta = meta.iloc[1:, :] 129 meta.index = Index(meta[rm.id]) 130 meta[rm.table] = table 131 meta[rm.tdesc] = raw.iloc[0, 0] 132 meta = meta.dropna(how="all", axis=1) # drop columns with all NaNs 133 134 # extract the data 135 data = raw.iloc[10:, :].copy() 136 data.columns = Index(data.iloc[0]) 137 data = data.iloc[1:, :] 138 data.index = DatetimeIndex(data.iloc[:, 0]) 139 data = data.iloc[:, 1:] 140 data = data.dropna(how="all", axis=1) # drop columns with all NaNs 141 142 # can we make the index into a PeriodIndex? 143 days = data.index.to_series().diff(1).dropna().dt.days 144 if days.min() >= 28 and days.max() <= 31: 145 data.index = PeriodIndex(data.index, freq="M") 146 elif days.min() >= 90 and days.max() <= 92: 147 data.index = PeriodIndex(data.index, freq="Q") 148 elif days.min() >= 365 and days.max() <= 366: 149 data.index = PeriodIndex(data.index, freq="Y") 150 else: 151 data.index = PeriodIndex(data.index, freq="D") 152 153 return data, meta 154 155 156def read_rba_ocr(monthly: bool = True, **kwargs: Any) -> Series: 157 """Read the Official Cash Rate (OCR) from the RBA website and return it 158 in a pandas Series, with either a daily or monthly PeriodIndex, 159 depending on the value of the monthly parameter. The default is monthly. 160 161 Parameters 162 ---------- 163 monthly : bool = True 164 If True, then the data will be returned with a monthly PeriodIndex. 165 If False, then the data will be returned with a daily PeriodIndex. 166 **kwargs : Any 167 Additional keyword arguments. The only keyword argument that is used is ignore_errors. 168 ignore_errors : bool = False 169 If True, then any major errors encountered will be printed and the function 170 will return an empty Series. If False, then any major errors encountered 171 will raise an exception. 172 173 Returns 174 ------- 175 Series 176 The OCR data in a pandas Series, with an index of either daily or monthly Periods. 177 178 Examples 179 -------- 180 ```python 181 ocr = read_rba_ocr(monthly=True) 182 ```""" 183 184 # read the OCR table from the RBA website, make float and sort, name the series 185 rba, _rba_meta = read_rba_table("A2", **kwargs) # should have a daily PeriodIndex 186 ocr = ( 187 rba.loc[lambda x: x.index >= "1990-08-02", "ARBAMPCNCRT"] 188 .astype(float) 189 .sort_index() 190 ) 191 ocr.name = "RBA Official Cash Rate" 192 193 # bring up to date 194 today = Period(Timestamp.today(), freq=cast(PeriodIndex, ocr.index).freqstr) 195 if ocr.index[-1] < today: 196 ocr[today] = ocr.iloc[-1] 197 198 if not monthly: 199 # fill in missing days and return daily data 200 daily_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="D") 201 ocr = ocr.reindex(daily_index).ffill() 202 return ocr 203 204 # convert to monthly data, keeping last value if duplicates in month 205 # fill in missing months 206 ocr.index = PeriodIndex(ocr.index, freq="M") 207 ocr = ocr[~ocr.index.duplicated(keep="last")] 208 monthly_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="M") 209 ocr = ocr.reindex(monthly_index, method="ffill") 210 return ocr 211 212 213# --- TESTING --- 214if __name__ == "__main__": 215 216 def test_read_rba_table(): 217 """Test the read_rba_table function.""" 218 219 # test with a known table 220 d, m = read_rba_table("C1") 221 print(m) 222 print(d.head()) 223 print(d.tail()) 224 print("=" * 20) 225 226 # test with an unknown table 227 try: 228 d, m = read_rba_table("XYZ") 229 except ValueError as e: 230 print(e) 231 print("=" * 20) 232 233 test_read_rba_table() 234 235 def test_read_rba_ocr(): 236 """Test the read_rba_ocr function.""" 237 238 # test with monthly data 239 ocr = read_rba_ocr(monthly=True) 240 print(ocr.head()) 241 print("...") 242 print(ocr.tail()) 243 print("=" * 20) 244 245 # test with daily data 246 ocr = read_rba_ocr(monthly=False) 247 print(ocr.head()) 248 print("...") 249 print(ocr.tail()) 250 print("=" * 20) 251 252 test_read_rba_ocr()
77def read_rba_table(table: str, **kwargs: Any) -> tuple[DataFrame, DataFrame]: 78 """Read a table from the RBA website and return the actual data 79 and the meta data in a tuple of two DataFrames. 80 81 Parameters 82 ---------- 83 table : str 84 The table to read from the RBA website. 85 **kwargs : Any 86 Additional keyword arguments. 87 The only keyword argument that is used is ignore_errors. 88 ignore_errors : bool = False 89 If True, then any major errors encountered will be printed and the function 90 will return empty DataFrames. If False, then any major errors encountered 91 will raise an exception. 92 93 Returns 94 ------- 95 tuple[DataFrame, DataFrame] 96 The primary data and the meta data in a tuple of two DataFrames. 97 98 Examples 99 -------- 100 ```python 101 data, meta = read_rba_table("C1") 102 ```""" 103 104 # set-up 105 ignore_errors = kwargs.get("ignore_errors", False) 106 data, meta = DataFrame(), DataFrame() 107 108 # get the Excel file 109 excel = _get_excel_file(table, ignore_errors, **kwargs) 110 if excel is None: 111 return data, meta 112 113 # read Excel file into DataFrame 114 try: 115 raw = read_excel(BytesIO(excel), header=None, index_col=None) 116 except Exception as e: 117 if ignore_errors: 118 print(f"Ignoring error: {e}") 119 return data, meta 120 raise 121 122 # extract the meta data 123 meta = raw.iloc[1:11, :].T.copy() 124 meta.columns = Index(meta.iloc[0]) 125 renamer = { 126 "Mnemonic": rm.id, 127 } # historical data is inconsistent 128 meta = meta.rename(columns=renamer) 129 meta = meta.iloc[1:, :] 130 meta.index = Index(meta[rm.id]) 131 meta[rm.table] = table 132 meta[rm.tdesc] = raw.iloc[0, 0] 133 meta = meta.dropna(how="all", axis=1) # drop columns with all NaNs 134 135 # extract the data 136 data = raw.iloc[10:, :].copy() 137 data.columns = Index(data.iloc[0]) 138 data = data.iloc[1:, :] 139 data.index = DatetimeIndex(data.iloc[:, 0]) 140 data = data.iloc[:, 1:] 141 data = data.dropna(how="all", axis=1) # drop columns with all NaNs 142 143 # can we make the index into a PeriodIndex? 144 days = data.index.to_series().diff(1).dropna().dt.days 145 if days.min() >= 28 and days.max() <= 31: 146 data.index = PeriodIndex(data.index, freq="M") 147 elif days.min() >= 90 and days.max() <= 92: 148 data.index = PeriodIndex(data.index, freq="Q") 149 elif days.min() >= 365 and days.max() <= 366: 150 data.index = PeriodIndex(data.index, freq="Y") 151 else: 152 data.index = PeriodIndex(data.index, freq="D") 153 154 return data, meta
Read a table from the RBA website and return the actual data and the meta data in a tuple of two DataFrames.
Parameters
table : str The table to read from the RBA website. **kwargs : Any Additional keyword arguments. The only keyword argument that is used is ignore_errors. ignore_errors : bool = False If True, then any major errors encountered will be printed and the function will return empty DataFrames. If False, then any major errors encountered will raise an exception.
Returns
tuple[DataFrame, DataFrame] The primary data and the meta data in a tuple of two DataFrames.
Examples
data, meta = read_rba_table("C1")
157def read_rba_ocr(monthly: bool = True, **kwargs: Any) -> Series: 158 """Read the Official Cash Rate (OCR) from the RBA website and return it 159 in a pandas Series, with either a daily or monthly PeriodIndex, 160 depending on the value of the monthly parameter. The default is monthly. 161 162 Parameters 163 ---------- 164 monthly : bool = True 165 If True, then the data will be returned with a monthly PeriodIndex. 166 If False, then the data will be returned with a daily PeriodIndex. 167 **kwargs : Any 168 Additional keyword arguments. The only keyword argument that is used is ignore_errors. 169 ignore_errors : bool = False 170 If True, then any major errors encountered will be printed and the function 171 will return an empty Series. If False, then any major errors encountered 172 will raise an exception. 173 174 Returns 175 ------- 176 Series 177 The OCR data in a pandas Series, with an index of either daily or monthly Periods. 178 179 Examples 180 -------- 181 ```python 182 ocr = read_rba_ocr(monthly=True) 183 ```""" 184 185 # read the OCR table from the RBA website, make float and sort, name the series 186 rba, _rba_meta = read_rba_table("A2", **kwargs) # should have a daily PeriodIndex 187 ocr = ( 188 rba.loc[lambda x: x.index >= "1990-08-02", "ARBAMPCNCRT"] 189 .astype(float) 190 .sort_index() 191 ) 192 ocr.name = "RBA Official Cash Rate" 193 194 # bring up to date 195 today = Period(Timestamp.today(), freq=cast(PeriodIndex, ocr.index).freqstr) 196 if ocr.index[-1] < today: 197 ocr[today] = ocr.iloc[-1] 198 199 if not monthly: 200 # fill in missing days and return daily data 201 daily_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="D") 202 ocr = ocr.reindex(daily_index).ffill() 203 return ocr 204 205 # convert to monthly data, keeping last value if duplicates in month 206 # fill in missing months 207 ocr.index = PeriodIndex(ocr.index, freq="M") 208 ocr = ocr[~ocr.index.duplicated(keep="last")] 209 monthly_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="M") 210 ocr = ocr.reindex(monthly_index, method="ffill") 211 return ocr
Read the Official Cash Rate (OCR) from the RBA website and return it in a pandas Series, with either a daily or monthly PeriodIndex, depending on the value of the monthly parameter. The default is monthly.
Parameters
monthly : bool = True If True, then the data will be returned with a monthly PeriodIndex. If False, then the data will be returned with a daily PeriodIndex. **kwargs : Any Additional keyword arguments. The only keyword argument that is used is ignore_errors. ignore_errors : bool = False If True, then any major errors encountered will be printed and the function will return an empty Series. If False, then any major errors encountered will raise an exception.
Returns
Series The OCR data in a pandas Series, with an index of either daily or monthly Periods.
Examples
ocr = read_rba_ocr(monthly=True)