readabs.read_abs_series
Get specific ABS data series by their ABS series identifiers.
1"""Get specific ABS data series by their ABS series identifiers.""" 2 3# --- imports 4# system imports 5from typing import Any, Sequence, cast 6 7# analytic imports 8from pandas import DataFrame, Index, PeriodIndex, concat 9 10# local imports 11from readabs.read_abs_cat import read_abs_cat 12from readabs.read_support import check_kwargs, get_args 13from readabs.abs_meta_data import metacol 14 15 16# --- functions 17def read_abs_series( 18 cat: str, 19 series_id: str | Sequence[str], 20 **kwargs: Any, 21) -> tuple[DataFrame, DataFrame]: 22 """Get specific ABS data series by their ABS catalogue and series identifiers. 23 24 Parameters 25 ---------- 26 cat : str 27 The ABS catalogue ID. 28 29 series_id : str | Sequence[str] 30 An ABS series ID or a sequence of ABS series IDs. 31 32 **kwargs : Any 33 Keyword arguments for the read_abs_series function, 34 which are the same as the keyword arguments for the 35 read_abs_cat function. 36 37 Returns 38 ------- 39 tuple[DataFrame, DataFrame] 40 A tuple of two DataFrames, one for the primary data and one for the metadata. 41 42 Example 43 ------- 44 45 ```python 46 import readabs as ra 47 from pandas import DataFrame 48 cat_num = "6202.0" # The ABS labour force survey 49 unemployment_rate = "A84423050A" 50 seo = "6202001" # The ABS table name 51 data, meta = ra.read_abs_series( 52 cat=cat_num, series_id=unemployment_rate, single_excel_only=seo 53 ) 54 ```""" 55 56 # check for unexpected keyword arguments/get defaults 57 check_kwargs(kwargs, "read_abs_series") 58 args = get_args(kwargs, "read_abs_series") 59 60 # read the ABS category data 61 cat_data, cat_meta = read_abs_cat(cat, **args) 62 63 # drop repeated series_ids in the meta data, 64 # make unique series_ids the index 65 cat_meta.index = Index(cat_meta[metacol.id]) 66 cat_meta = cat_meta.groupby(cat_meta.index).first() 67 68 # get the ABS series data 69 if isinstance(series_id, str): 70 series_id = [series_id] 71 return_data, return_meta = DataFrame(), DataFrame() 72 for identifier in series_id: 73 74 # confirm that the series ID is in the catalogue 75 if identifier not in cat_meta.index: 76 if args["verbose"]: 77 print(f"Series ID {identifier} not found in ABS catalogue ID {cat}") 78 if args["ignore_errors"]: 79 continue 80 raise ValueError(f"Series ID {identifier} not found in catalogue {cat}") 81 82 # confirm thay the index of the series is compatible 83 table = str(cat_meta.loc[identifier, metacol.table]) # str for mypy 84 data_series = cat_data[table][identifier] 85 if ( 86 len(return_data) > 0 87 and cast(PeriodIndex, return_data.index).freq 88 != cast(PeriodIndex, data_series.index).freq 89 ): 90 if args["verbose"]: 91 print(f"Frequency mismatch for series ID {identifier}") 92 if args["ignore_errors"]: 93 continue 94 raise ValueError(f"Frequency mismatch for series ID {identifier}") 95 96 # add the series data and meta data to the return values 97 if len(return_data) > 0: 98 return_data = return_data.reindex( 99 return_data.index.union(data_series.index) 100 ) 101 return_data[identifier] = data_series 102 return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1) 103 104 return return_data, return_meta.T 105 106 107if __name__ == "__main__": 108 109 def simple_test() -> None: 110 """Simple test of the read_abs_series function.""" 111 # simple test 112 # Trimmed Mean - through the year CPI growth - seasonally adjusted 113 data, meta = read_abs_series("6401.0", "A3604511X", single_excel_only="640106") 114 print(data.tail()) 115 print(meta.T) 116 print("Done") 117 118 simple_test()
def
read_abs_series( cat: str, series_id: Union[str, Sequence[str]], **kwargs: Any) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
18def read_abs_series( 19 cat: str, 20 series_id: str | Sequence[str], 21 **kwargs: Any, 22) -> tuple[DataFrame, DataFrame]: 23 """Get specific ABS data series by their ABS catalogue and series identifiers. 24 25 Parameters 26 ---------- 27 cat : str 28 The ABS catalogue ID. 29 30 series_id : str | Sequence[str] 31 An ABS series ID or a sequence of ABS series IDs. 32 33 **kwargs : Any 34 Keyword arguments for the read_abs_series function, 35 which are the same as the keyword arguments for the 36 read_abs_cat function. 37 38 Returns 39 ------- 40 tuple[DataFrame, DataFrame] 41 A tuple of two DataFrames, one for the primary data and one for the metadata. 42 43 Example 44 ------- 45 46 ```python 47 import readabs as ra 48 from pandas import DataFrame 49 cat_num = "6202.0" # The ABS labour force survey 50 unemployment_rate = "A84423050A" 51 seo = "6202001" # The ABS table name 52 data, meta = ra.read_abs_series( 53 cat=cat_num, series_id=unemployment_rate, single_excel_only=seo 54 ) 55 ```""" 56 57 # check for unexpected keyword arguments/get defaults 58 check_kwargs(kwargs, "read_abs_series") 59 args = get_args(kwargs, "read_abs_series") 60 61 # read the ABS category data 62 cat_data, cat_meta = read_abs_cat(cat, **args) 63 64 # drop repeated series_ids in the meta data, 65 # make unique series_ids the index 66 cat_meta.index = Index(cat_meta[metacol.id]) 67 cat_meta = cat_meta.groupby(cat_meta.index).first() 68 69 # get the ABS series data 70 if isinstance(series_id, str): 71 series_id = [series_id] 72 return_data, return_meta = DataFrame(), DataFrame() 73 for identifier in series_id: 74 75 # confirm that the series ID is in the catalogue 76 if identifier not in cat_meta.index: 77 if args["verbose"]: 78 print(f"Series ID {identifier} not found in ABS catalogue ID {cat}") 79 if args["ignore_errors"]: 80 continue 81 raise ValueError(f"Series ID {identifier} not found in catalogue {cat}") 82 83 # confirm thay the index of the series is compatible 84 table = str(cat_meta.loc[identifier, metacol.table]) # str for mypy 85 data_series = cat_data[table][identifier] 86 if ( 87 len(return_data) > 0 88 and cast(PeriodIndex, return_data.index).freq 89 != cast(PeriodIndex, data_series.index).freq 90 ): 91 if args["verbose"]: 92 print(f"Frequency mismatch for series ID {identifier}") 93 if args["ignore_errors"]: 94 continue 95 raise ValueError(f"Frequency mismatch for series ID {identifier}") 96 97 # add the series data and meta data to the return values 98 if len(return_data) > 0: 99 return_data = return_data.reindex( 100 return_data.index.union(data_series.index) 101 ) 102 return_data[identifier] = data_series 103 return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1) 104 105 return return_data, return_meta.T
Get specific ABS data series by their ABS catalogue and series identifiers.
Parameters
cat : str The ABS catalogue ID.
series_id : str | Sequence[str] An ABS series ID or a sequence of ABS series IDs.
**kwargs : Any Keyword arguments for the read_abs_series function, which are the same as the keyword arguments for the read_abs_cat function.
Returns
tuple[DataFrame, DataFrame] A tuple of two DataFrames, one for the primary data and one for the metadata.
Example
import readabs as ra
from pandas import DataFrame
cat_num = "6202.0" # The ABS labour force survey
unemployment_rate = "A84423050A"
seo = "6202001" # The ABS table name
data, meta = ra.read_abs_series(
cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
)