readabs.read_abs_series

Get specific ABS data series by their ABS series identifiers.

  1"""Get specific ABS data series by their ABS series identifiers."""
  2
  3# --- imports
  4# system imports
  5from typing import Any, Sequence, cast
  6
  7# analytic imports
  8from pandas import DataFrame, Index, PeriodIndex, concat
  9
 10# local imports
 11from readabs.read_abs_cat import read_abs_cat
 12from readabs.read_support import check_kwargs, get_args
 13from readabs.abs_meta_data import metacol
 14
 15
 16# --- functions
 17def read_abs_series(
 18    cat: str,
 19    series_id: str | Sequence[str],
 20    **kwargs: Any,
 21) -> tuple[DataFrame, DataFrame]:
 22    """Get specific ABS data series by their ABS catalogue and series identifiers.
 23
 24    Parameters
 25    ----------
 26    cat : str
 27        The ABS catalogue ID.
 28
 29    series_id : str | Sequence[str]
 30        An ABS series ID or a sequence of ABS series IDs.
 31
 32    **kwargs : Any
 33        Keyword arguments for the read_abs_series function,
 34        which are the same as the keyword arguments for the
 35        read_abs_cat function.
 36
 37    Returns
 38    -------
 39    tuple[DataFrame, DataFrame]
 40        A tuple of two DataFrames, one for the primary data and one for the metadata.
 41
 42    Example
 43    -------
 44
 45    ```python
 46    import readabs as ra
 47    from pandas import DataFrame
 48    cat_num = "6202.0"  # The ABS labour force survey
 49    unemployment_rate = "A84423050A"
 50    seo = "6202001"  # The ABS table name
 51    data, meta = ra.read_abs_series(
 52        cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
 53    )
 54    ```"""
 55
 56    # check for unexpected keyword arguments/get defaults
 57    check_kwargs(kwargs, "read_abs_series")
 58    args = get_args(kwargs, "read_abs_series")
 59
 60    # read the ABS category data
 61    cat_data, cat_meta = read_abs_cat(cat, **args)
 62
 63    # drop repeated series_ids in the meta data,
 64    # make unique series_ids the index
 65    cat_meta.index = Index(cat_meta[metacol.id])
 66    cat_meta = cat_meta.groupby(cat_meta.index).first()
 67
 68    # get the ABS series data
 69    if isinstance(series_id, str):
 70        series_id = [series_id]
 71    return_data, return_meta = DataFrame(), DataFrame()
 72    for identifier in series_id:
 73
 74        # confirm that the series ID is in the catalogue
 75        if identifier not in cat_meta.index:
 76            if args["verbose"]:
 77                print(f"Series ID {identifier} not found in ABS catalogue ID {cat}")
 78            if args["ignore_errors"]:
 79                continue
 80            raise ValueError(f"Series ID {identifier} not found in catalogue {cat}")
 81
 82        # confirm thay the index of the series is compatible
 83        table = str(cat_meta.loc[identifier, metacol.table])  # str for mypy
 84        data_series = cat_data[table][identifier]
 85        if (
 86            len(return_data) > 0
 87            and cast(PeriodIndex, return_data.index).freq
 88            != cast(PeriodIndex, data_series.index).freq
 89        ):
 90            if args["verbose"]:
 91                print(f"Frequency mismatch for series ID {identifier}")
 92            if args["ignore_errors"]:
 93                continue
 94            raise ValueError(f"Frequency mismatch for series ID {identifier}")
 95
 96        # add the series data and meta data to the return values
 97        if len(return_data) > 0:
 98            return_data = return_data.reindex(
 99                return_data.index.union(data_series.index)
100            )
101        return_data[identifier] = data_series
102        return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1)
103
104    return return_data, return_meta.T
105
106
107if __name__ == "__main__":
108
109    def simple_test() -> None:
110        """Simple test of the read_abs_series function."""
111        # simple test
112        # Trimmed Mean - through the year CPI growth - seasonally adjusted
113        data, meta = read_abs_series("6401.0", "A3604511X", single_excel_only="640106")
114        print(data.tail())
115        print(meta.T)
116        print("Done")
117
118    simple_test()
def read_abs_series( cat: str, series_id: Union[str, Sequence[str]], **kwargs: Any) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
 18def read_abs_series(
 19    cat: str,
 20    series_id: str | Sequence[str],
 21    **kwargs: Any,
 22) -> tuple[DataFrame, DataFrame]:
 23    """Get specific ABS data series by their ABS catalogue and series identifiers.
 24
 25    Parameters
 26    ----------
 27    cat : str
 28        The ABS catalogue ID.
 29
 30    series_id : str | Sequence[str]
 31        An ABS series ID or a sequence of ABS series IDs.
 32
 33    **kwargs : Any
 34        Keyword arguments for the read_abs_series function,
 35        which are the same as the keyword arguments for the
 36        read_abs_cat function.
 37
 38    Returns
 39    -------
 40    tuple[DataFrame, DataFrame]
 41        A tuple of two DataFrames, one for the primary data and one for the metadata.
 42
 43    Example
 44    -------
 45
 46    ```python
 47    import readabs as ra
 48    from pandas import DataFrame
 49    cat_num = "6202.0"  # The ABS labour force survey
 50    unemployment_rate = "A84423050A"
 51    seo = "6202001"  # The ABS table name
 52    data, meta = ra.read_abs_series(
 53        cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
 54    )
 55    ```"""
 56
 57    # check for unexpected keyword arguments/get defaults
 58    check_kwargs(kwargs, "read_abs_series")
 59    args = get_args(kwargs, "read_abs_series")
 60
 61    # read the ABS category data
 62    cat_data, cat_meta = read_abs_cat(cat, **args)
 63
 64    # drop repeated series_ids in the meta data,
 65    # make unique series_ids the index
 66    cat_meta.index = Index(cat_meta[metacol.id])
 67    cat_meta = cat_meta.groupby(cat_meta.index).first()
 68
 69    # get the ABS series data
 70    if isinstance(series_id, str):
 71        series_id = [series_id]
 72    return_data, return_meta = DataFrame(), DataFrame()
 73    for identifier in series_id:
 74
 75        # confirm that the series ID is in the catalogue
 76        if identifier not in cat_meta.index:
 77            if args["verbose"]:
 78                print(f"Series ID {identifier} not found in ABS catalogue ID {cat}")
 79            if args["ignore_errors"]:
 80                continue
 81            raise ValueError(f"Series ID {identifier} not found in catalogue {cat}")
 82
 83        # confirm thay the index of the series is compatible
 84        table = str(cat_meta.loc[identifier, metacol.table])  # str for mypy
 85        data_series = cat_data[table][identifier]
 86        if (
 87            len(return_data) > 0
 88            and cast(PeriodIndex, return_data.index).freq
 89            != cast(PeriodIndex, data_series.index).freq
 90        ):
 91            if args["verbose"]:
 92                print(f"Frequency mismatch for series ID {identifier}")
 93            if args["ignore_errors"]:
 94                continue
 95            raise ValueError(f"Frequency mismatch for series ID {identifier}")
 96
 97        # add the series data and meta data to the return values
 98        if len(return_data) > 0:
 99            return_data = return_data.reindex(
100                return_data.index.union(data_series.index)
101            )
102        return_data[identifier] = data_series
103        return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1)
104
105    return return_data, return_meta.T

Get specific ABS data series by their ABS catalogue and series identifiers.

Parameters

cat : str The ABS catalogue ID.

series_id : str | Sequence[str] An ABS series ID or a sequence of ABS series IDs.

**kwargs : Any Keyword arguments for the read_abs_series function, which are the same as the keyword arguments for the read_abs_cat function.

Returns

tuple[DataFrame, DataFrame] A tuple of two DataFrames, one for the primary data and one for the metadata.

Example

import readabs as ra
from pandas import DataFrame
cat_num = "6202.0"  # The ABS labour force survey
unemployment_rate = "A84423050A"
seo = "6202001"  # The ABS table name
data, meta = ra.read_abs_series(
    cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
)