readabs.read_rba_table

Read a table from the RBA website and store it in a pandas DataFrame.

  1"""Read a table from the RBA website and store it in a pandas DataFrame."""
  2
  3from typing import Any, cast
  4from io import BytesIO
  5import re
  6from pandas import (
  7    DataFrame,
  8    DatetimeIndex,
  9    PeriodIndex,
 10    Period,
 11    Index,
 12    read_excel,
 13    Series,
 14    Timestamp,
 15    period_range,
 16)
 17
 18# local imports
 19from readabs.rba_catalogue import rba_catalogue
 20from readabs.download_cache import get_file, HttpError, CacheError
 21from readabs.rba_meta_data import rba_metacol as rm
 22
 23
 24# --- PRIVATE ---
 25def _get_excel_file(
 26    table: str,
 27    ignore_errors: bool,
 28    **kwargs: Any,
 29) -> bytes | None:
 30    """Get the Excel file from the RBA website for the given table.
 31    Return bytes if successful, otherwise return None.
 32    Raises an exception if ignore_errors is False."""
 33
 34    # get the relevant URL for a table moniker
 35    cat_map = rba_catalogue()
 36    if table not in cat_map.index:
 37        message = f"Table '{table}' not found in RBA catalogue."
 38        if ignore_errors:
 39            print(f"Ignoring error: {message}")
 40            return None
 41        raise ValueError(message)
 42    url = str(cat_map.loc[table, "URL"])
 43
 44    # get Excel file - try different file name extensions
 45    # becasue the RBA website sometimes changes the file
 46    # extension in error
 47    urls = [
 48        url,
 49    ]
 50    rex = re.compile(r"\.[^/]*$")
 51    match = rex.search(url)
 52    if match is not None:
 53        tail = match.group()
 54        replace_with = {".xls": ".xlsx", ".xlsx": ".xls"}
 55        new_url = re.sub(rex, replace_with.get(tail, tail), url)
 56        if new_url != url:
 57            urls += [new_url]
 58
 59    # try to get the Excel file - including with different exensions
 60    for this_url in urls:
 61        try:
 62            excel = get_file(this_url, **kwargs)
 63        except (HttpError, CacheError) as e:
 64            if this_url == urls[-1]:
 65                if ignore_errors:
 66                    print(f"Ignoring error: {e}")
 67                    return None
 68                raise
 69        else:
 70            break
 71
 72    return excel
 73
 74
 75# --- PUBLIC ---
 76def read_rba_table(table: str, **kwargs: Any) -> tuple[DataFrame, DataFrame]:
 77    """Read a table from the RBA website and return the actual data
 78    and the meta data in a tuple of two DataFrames.
 79
 80    Parameters
 81    ----------
 82    table : str
 83        The table to read from the RBA website.
 84    **kwargs : Any
 85        Additional keyword arguments.
 86        The only keyword argument that is used is ignore_errors.
 87    ignore_errors : bool = False
 88        If True, then any major errors encountered will be printed and the function
 89        will return empty DataFrames. If False, then any major errors encountered
 90        will raise an exception.
 91
 92    Returns
 93    -------
 94    tuple[DataFrame, DataFrame]
 95        The primary data and the meta data in a tuple of two DataFrames.
 96
 97    Examples
 98    --------
 99    ```python
100    data, meta = read_rba_table("C1")
101    ```"""
102
103    # set-up
104    ignore_errors = kwargs.get("ignore_errors", False)
105    data, meta = DataFrame(), DataFrame()
106
107    # get the Excel file
108    excel = _get_excel_file(table, ignore_errors, **kwargs)
109    if excel is None:
110        return data, meta
111
112    # read Excel file into DataFrame
113    try:
114        raw = read_excel(BytesIO(excel), header=None, index_col=None)
115    except Exception as e:
116        if ignore_errors:
117            print(f"Ignoring error: {e}")
118            return data, meta
119        raise
120
121    # extract the meta data
122    meta = raw.iloc[1:11, :].T.copy()
123    meta.columns = Index(meta.iloc[0])
124    renamer = {
125        "Mnemonic": rm.id,
126    }  # historical data is inconsistent
127    meta = meta.rename(columns=renamer)
128    meta = meta.iloc[1:, :]
129    meta.index = Index(meta[rm.id])
130    meta[rm.table] = table
131    meta[rm.tdesc] = raw.iloc[0, 0]
132    meta = meta.dropna(how="all", axis=1)  # drop columns with all NaNs
133
134    # extract the data
135    data = raw.iloc[10:, :].copy()
136    data.columns = Index(data.iloc[0])
137    data = data.iloc[1:, :]
138    data.index = DatetimeIndex(data.iloc[:, 0])
139    data = data.iloc[:, 1:]
140    data = data.dropna(how="all", axis=1)  # drop columns with all NaNs
141
142    # can we make the index into a PeriodIndex?
143    days = data.index.to_series().diff(1).dropna().dt.days
144    if days.min() >= 28 and days.max() <= 31:
145        data.index = PeriodIndex(data.index, freq="M")
146    elif days.min() >= 90 and days.max() <= 92:
147        data.index = PeriodIndex(data.index, freq="Q")
148    elif days.min() >= 365 and days.max() <= 366:
149        data.index = PeriodIndex(data.index, freq="Y")
150    else:
151        data.index = PeriodIndex(data.index, freq="D")
152
153    return data, meta
154
155
156def read_rba_ocr(monthly: bool = True, **kwargs: Any) -> Series:
157    """Read the Official Cash Rate (OCR) from the RBA website and return it
158    in a pandas Series, with either a daily or monthly PeriodIndex,
159    depending on the value of the monthly parameter. The default is monthly.
160
161    Parameters
162    ----------
163    monthly : bool = True
164        If True, then the data will be returned with a monthly PeriodIndex.
165        If False, then the data will be returned with a daily PeriodIndex.
166    **kwargs : Any
167        Additional keyword arguments. The only keyword argument that is used is ignore_errors.
168    ignore_errors : bool = False
169        If True, then any major errors encountered will be printed and the function
170        will return an empty Series. If False, then any major errors encountered
171        will raise an exception.
172
173    Returns
174    -------
175    Series
176        The OCR data in a pandas Series, with an index of either daily or monthly Periods.
177
178    Examples
179    --------
180    ```python
181    ocr = read_rba_ocr(monthly=True)
182    ```"""
183
184    # read the OCR table from the RBA website, make float and sort, name the series
185    rba, _rba_meta = read_rba_table("A2", **kwargs)  # should have a daily PeriodIndex
186    ocr = (
187        rba.loc[lambda x: x.index >= "1990-08-02", "ARBAMPCNCRT"]
188        .astype(float)
189        .sort_index()
190    )
191    ocr.name = "RBA Official Cash Rate"
192
193    # bring up to date
194    today = Period(Timestamp.today(), freq=cast(PeriodIndex, ocr.index).freqstr)
195    if ocr.index[-1] < today:
196        ocr[today] = ocr.iloc[-1]
197
198    if not monthly:
199        # fill in missing days and return daily data
200        daily_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="D")
201        ocr = ocr.reindex(daily_index).ffill()
202        return ocr
203
204    # convert to monthly data, keeping last value if duplicates in month
205    # fill in missing months
206    ocr.index = PeriodIndex(ocr.index, freq="M")
207    ocr = ocr[~ocr.index.duplicated(keep="last")]
208    monthly_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="M")
209    ocr = ocr.reindex(monthly_index, method="ffill")
210    return ocr
211
212
213# --- TESTING ---
214if __name__ == "__main__":
215
216    def test_read_rba_table():
217        """Test the read_rba_table function."""
218
219        # test with a known table
220        d, m = read_rba_table("C1")
221        print(m)
222        print(d.head())
223        print(d.tail())
224        print("=" * 20)
225
226        # test with an unknown table
227        try:
228            d, m = read_rba_table("XYZ")
229        except ValueError as e:
230            print(e)
231        print("=" * 20)
232
233    test_read_rba_table()
234
235    def test_read_rba_ocr():
236        """Test the read_rba_ocr function."""
237
238        # test with monthly data
239        ocr = read_rba_ocr(monthly=True)
240        print(ocr.head())
241        print("...")
242        print(ocr.tail())
243        print("=" * 20)
244
245        # test with daily data
246        ocr = read_rba_ocr(monthly=False)
247        print(ocr.head())
248        print("...")
249        print(ocr.tail())
250        print("=" * 20)
251
252    test_read_rba_ocr()
def read_rba_table( table: str, **kwargs: Any) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
 77def read_rba_table(table: str, **kwargs: Any) -> tuple[DataFrame, DataFrame]:
 78    """Read a table from the RBA website and return the actual data
 79    and the meta data in a tuple of two DataFrames.
 80
 81    Parameters
 82    ----------
 83    table : str
 84        The table to read from the RBA website.
 85    **kwargs : Any
 86        Additional keyword arguments.
 87        The only keyword argument that is used is ignore_errors.
 88    ignore_errors : bool = False
 89        If True, then any major errors encountered will be printed and the function
 90        will return empty DataFrames. If False, then any major errors encountered
 91        will raise an exception.
 92
 93    Returns
 94    -------
 95    tuple[DataFrame, DataFrame]
 96        The primary data and the meta data in a tuple of two DataFrames.
 97
 98    Examples
 99    --------
100    ```python
101    data, meta = read_rba_table("C1")
102    ```"""
103
104    # set-up
105    ignore_errors = kwargs.get("ignore_errors", False)
106    data, meta = DataFrame(), DataFrame()
107
108    # get the Excel file
109    excel = _get_excel_file(table, ignore_errors, **kwargs)
110    if excel is None:
111        return data, meta
112
113    # read Excel file into DataFrame
114    try:
115        raw = read_excel(BytesIO(excel), header=None, index_col=None)
116    except Exception as e:
117        if ignore_errors:
118            print(f"Ignoring error: {e}")
119            return data, meta
120        raise
121
122    # extract the meta data
123    meta = raw.iloc[1:11, :].T.copy()
124    meta.columns = Index(meta.iloc[0])
125    renamer = {
126        "Mnemonic": rm.id,
127    }  # historical data is inconsistent
128    meta = meta.rename(columns=renamer)
129    meta = meta.iloc[1:, :]
130    meta.index = Index(meta[rm.id])
131    meta[rm.table] = table
132    meta[rm.tdesc] = raw.iloc[0, 0]
133    meta = meta.dropna(how="all", axis=1)  # drop columns with all NaNs
134
135    # extract the data
136    data = raw.iloc[10:, :].copy()
137    data.columns = Index(data.iloc[0])
138    data = data.iloc[1:, :]
139    data.index = DatetimeIndex(data.iloc[:, 0])
140    data = data.iloc[:, 1:]
141    data = data.dropna(how="all", axis=1)  # drop columns with all NaNs
142
143    # can we make the index into a PeriodIndex?
144    days = data.index.to_series().diff(1).dropna().dt.days
145    if days.min() >= 28 and days.max() <= 31:
146        data.index = PeriodIndex(data.index, freq="M")
147    elif days.min() >= 90 and days.max() <= 92:
148        data.index = PeriodIndex(data.index, freq="Q")
149    elif days.min() >= 365 and days.max() <= 366:
150        data.index = PeriodIndex(data.index, freq="Y")
151    else:
152        data.index = PeriodIndex(data.index, freq="D")
153
154    return data, meta

Read a table from the RBA website and return the actual data and the meta data in a tuple of two DataFrames.

Parameters

table : str The table to read from the RBA website. **kwargs : Any Additional keyword arguments. The only keyword argument that is used is ignore_errors. ignore_errors : bool = False If True, then any major errors encountered will be printed and the function will return empty DataFrames. If False, then any major errors encountered will raise an exception.

Returns

tuple[DataFrame, DataFrame] The primary data and the meta data in a tuple of two DataFrames.

Examples

data, meta = read_rba_table("C1")
def read_rba_ocr(monthly: bool = True, **kwargs: Any) -> pandas.core.series.Series:
157def read_rba_ocr(monthly: bool = True, **kwargs: Any) -> Series:
158    """Read the Official Cash Rate (OCR) from the RBA website and return it
159    in a pandas Series, with either a daily or monthly PeriodIndex,
160    depending on the value of the monthly parameter. The default is monthly.
161
162    Parameters
163    ----------
164    monthly : bool = True
165        If True, then the data will be returned with a monthly PeriodIndex.
166        If False, then the data will be returned with a daily PeriodIndex.
167    **kwargs : Any
168        Additional keyword arguments. The only keyword argument that is used is ignore_errors.
169    ignore_errors : bool = False
170        If True, then any major errors encountered will be printed and the function
171        will return an empty Series. If False, then any major errors encountered
172        will raise an exception.
173
174    Returns
175    -------
176    Series
177        The OCR data in a pandas Series, with an index of either daily or monthly Periods.
178
179    Examples
180    --------
181    ```python
182    ocr = read_rba_ocr(monthly=True)
183    ```"""
184
185    # read the OCR table from the RBA website, make float and sort, name the series
186    rba, _rba_meta = read_rba_table("A2", **kwargs)  # should have a daily PeriodIndex
187    ocr = (
188        rba.loc[lambda x: x.index >= "1990-08-02", "ARBAMPCNCRT"]
189        .astype(float)
190        .sort_index()
191    )
192    ocr.name = "RBA Official Cash Rate"
193
194    # bring up to date
195    today = Period(Timestamp.today(), freq=cast(PeriodIndex, ocr.index).freqstr)
196    if ocr.index[-1] < today:
197        ocr[today] = ocr.iloc[-1]
198
199    if not monthly:
200        # fill in missing days and return daily data
201        daily_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="D")
202        ocr = ocr.reindex(daily_index).ffill()
203        return ocr
204
205    # convert to monthly data, keeping last value if duplicates in month
206    # fill in missing months
207    ocr.index = PeriodIndex(ocr.index, freq="M")
208    ocr = ocr[~ocr.index.duplicated(keep="last")]
209    monthly_index = period_range(start=ocr.index.min(), end=ocr.index.max(), freq="M")
210    ocr = ocr.reindex(monthly_index, method="ffill")
211    return ocr

Read the Official Cash Rate (OCR) from the RBA website and return it in a pandas Series, with either a daily or monthly PeriodIndex, depending on the value of the monthly parameter. The default is monthly.

Parameters

monthly : bool = True If True, then the data will be returned with a monthly PeriodIndex. If False, then the data will be returned with a daily PeriodIndex. **kwargs : Any Additional keyword arguments. The only keyword argument that is used is ignore_errors. ignore_errors : bool = False If True, then any major errors encountered will be printed and the function will return an empty Series. If False, then any major errors encountered will raise an exception.

Returns

Series The OCR data in a pandas Series, with an index of either daily or monthly Periods.

Examples

ocr = read_rba_ocr(monthly=True)