Module smear_api_tools
Expand source code
import pandas as pd
import numpy as np
import urllib.request, json
from collections.abc import Iterable
from datetime import datetime, timedelta
__pdoc__ = {
'isStr': False,
'isStrIterable': False,
'isDatetime': False,
'isDatetimeIterable': False,
'isNumeric': False,
'isNumericIterable': False,
'parse': False,
}
def isStr(obj):
return isinstance(obj,str)
def isStrIterable(obj):
result=False
if (isinstance(obj,Iterable) & ~isinstance(obj,str)):
if all(isinstance(elem,str) for elem in obj):
result=True
return result
def isDatetime(obj):
return isinstance(obj,datetime)
def isDatetimeIterable(obj):
result=False
if isinstance(obj,Iterable):
if all(isinstance(elem,datetime) for elem in obj):
result=True
return result
def isNumeric(obj):
try:
float(obj)
return True
except:
return False
def isNumericIterable(obj):
result=False
if isinstance(obj,Iterable):
if all(isNumeric(elem) for elem in obj):
result=True
return result
def parse(year, month, day, hour, minute, second):
return year+ '-' +month+ '-' +day+ ' ' +hour+ ':' +minute+ ':' +second
def getData(variables,dates=None,start=None,end=None,quality='ANY',averaging='1',avg_type='NONE'):
""" Get timeseries of variables using Smart SMEAR API
Parameters
----------
variables : string or array of strings
name of a measured quantity in the database (tablevariable)
dates : datetime object or string or array of datetime objects or strings
the date(s) for which measurements are downloaded
start : datetime object or string
begin of measurement
end : datetime object or string
end of measurement
quality : string
`"ANY"`
averaging : string
`"1"` (no averaging), `"30"` (30 min) or `"60"` (60 min)
avg_type : string
`"NONE"`, `"ARITHMETIC"`, `"MEDIAN"`, `"MIN"` or `"MAX"`
Returns
-------
pandas DataFrame or list of DataFrames
downloaded data, list is given for array of date objects
"""
if isStrIterable(variables):
col_names = [x for x in variables]
tablevariables = ['&tablevariable='+x for x in variables]
variable_string = ''.join(list(tablevariables))
elif isStr(variables):
col_names = [variables]
variable_string = '&tablevariable='+variables
else:
raise Exception('"variables" must be string or array of strings')
if ((start is not None) and (end is not None) and (dates is not None)):
raise Exception('Give either "start" and "end" or "dates"')
if ((start is not None) and (end is not None)):
if (isDatetime(start) & isDatetime(end)):
pass
elif (isStr(start) & isStr(end)):
start=pd.to_datetime(start)
end=pd.to_datetime(end)
else:
raise Exception('"start" and "end" must be datetime objects or strings')
st=start.strftime("%Y-%m-%dT%H:%M:%S")
et=end.strftime("%Y-%m-%dT%H:%M:%S")
try:
url_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\
+variable_string\
+'&from='+st.replace(':','%3A')\
+'&to='+et.replace(':','%3A')\
+'&quality='+quality\
+'&interval='+averaging\
+'&aggregation='+avg_type
data = pd.read_csv(url_string, parse_dates = [[0,1,2,3,4,5]], date_parser = parse)
except:
return pd.DataFrame([])
if data.empty:
return pd.DataFrame([])
else:
data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True)
data.index.names=['time']
data = data.reindex(col_names, axis=1)
data.columns = col_names
return data
elif dates is not None:
if isDatetimeIterable(dates):
is_date_list=True
elif isDatetime(dates):
dates = [dates]
is_date_list=False
elif isStrIterable(dates):
dates = pd.to_datetime(dates)
is_date_list=True
elif isStr(dates):
dates = [pd.to_datetime(dates)]
is_date_list=False
else:
raise Exception('"dates" must be datetime object or string or array of datetime objects or strings')
datas = []
for t in dates:
st=t.strftime("%Y-%m-%dT%H:%M:%S")
et=(t+timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%S")
try:
url_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\
+variable_string\
+'&from='+st.replace(':','%3A')\
+'&to='+et.replace(':','%3A')\
+'&quality='+quality\
+'&interval='+averaging\
+'&aggregation='+avg_type
data = pd.read_csv(url_string, parse_dates = [[0,1,2,3,4,5]], date_parser = parse)
except:
datas.append(pd.DataFrame([]))
continue
if data.empty:
datas.append(pd.DataFrame([]))
continue
data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True)
data.index.names=['time']
data = data.reindex(col_names, axis=1)
data.columns = col_names
datas.append(data)
if (is_date_list==False):
return datas[0]
else:
return datas
else:
raise Exception('Missing "start" and "end" or "dates"')
def listAllData(search_term=None,verbose=False):
""" List and describe variables in the SMEAR database
Parameters
----------
search_term : string
search term for searching the database, if `None`
all results are returned.
verbose : boolean
verbose or not verbose output
Returns
-------
pandas DataFrame
"""
# Find variables that contain the search_term in their title
if search_term is not None:
if isStr(search_term):
pass
else:
raise Exception('"search_term" should be a string')
if isinstance(verbose,bool):
pass
else:
raise Exception('"verbose" should be True or False')
variable_meta_url="https://smear-backend.rahtiapp.fi/search/variable"
nums = list("0123456789x")
numssub = list("₀₁₂₃₄₅₆₇₈₉ₓ")
# Variable metadata
with urllib.request.urlopen(variable_meta_url) as url:
variablemetadata = json.loads(url.read().decode())
# Create dataframe with description and variable name
df_verb = pd.DataFrame(columns=['title','tablevariable','description','source'])
df_short = pd.DataFrame(columns=['title','tablevariable'])
for x in variablemetadata:
if verbose:
title = str(x['title'])
tablevariable = x['tableName']+'.'+x['name']
description = str(x['description'])
source = str(x['source'])
# Replace subscripted numbers with normal numbers in the names
if isStr(title):
for i in range(len(nums)):
title = title.replace(numssub[i], nums[i])
if isStr(description):
for i in range(len(nums)):
description = description.replace(numssub[i], nums[i])
if isStr(source):
for i in range(len(nums)):
source = source.replace(numssub[i], nums[i])
df2 = pd.DataFrame([[
title,
tablevariable,
description,
source]],
columns=['title','tablevariable','description','source'])
df_verb = df_verb.append(df2,ignore_index=True)
else:
title = str(x['title'])
tablevariable = x['tableName']+'.'+x['name']
# Replace subscripted numbers with normal numbers in the names
if isStr(title):
for i in range(len(nums)):
title = title.replace(numssub[i], nums[i])
df2 = pd.DataFrame([[
title,
tablevariable]],
columns=['title','tablevariable'])
df_short = df_short.append(df2,ignore_index=True)
if verbose:
df = df_verb
else:
df = df_short
# Find variables that contain the search_term in their title
if search_term is not None:
df = df.loc[df['title'].str.lower().str.find(search_term.lower())!=-1.0,:]
return df
def getVariableMetadata(variables):
""" Get variable metadata using Smart SMEAR API
Parameters
----------
variables : string or array of strings
name (tablevariable) of a measured quantity in the database
Returns
-------
dictionary or list of dictionaries
metadata for given tablevariables
"""
if isStrIterable(variables):
col_names = [x for x in variables]
tablevariables = ['&tablevariable='+x for x in variables]
variable_string = ''.join(list(tablevariables))
elif isStr(variables):
col_names = [variables]
variable_string = '&tablevariable='+variables
else:
raise Exception('"variables" must be string or array of strings')
meta_url = 'https://smear-backend.rahtiapp.fi/search/variable?'+variable_string
try:
with urllib.request.urlopen(meta_url) as url:
metadata = json.loads(url.read().decode())
except:
metadata = []
return metadata
def getDmpsData(station='HYY',start=None,end=None,dates=None,quality='ANY',averaging='1',avg_type='NONE'):
""" Get DMPS data using Smart SMEAR API
Parameters
----------
station : string
`"HYY"`, `"KUM"` or `"VAR"`
dates : datetime object or string or array of datetime objects or strings
the days for which data is are downloaded
start : datetime object or string
begin time for data
end : datetime object or string
end time for data
quality : string
`"ANY"`
averaging : string
`"1"` (no averaging), `"30"` (30 minutes) or `"60"` (60 minutes)
avg_type : str
`"NONE"`, `"ARITHMETIC"`, `"MEDIAN"`, `"MIN"` or `"MAX"`
Returns
-------
pandas DataFrame or list of DataFrames
downloaded data, list is given when dates is array
index = time (utc+2)
columns = bin geometric mean diameters (m)
values = dN/dlogDp (cm-3)
"""
variables = ['d100e1',
'd112e1',
'd126e1',
'd141e1',
'd158e1',
'd178e1',
'd200e1',
'd224e1',
'd251e1',
'd282e1',
'd316e1',
'd355e1',
'd398e1',
'd447e1',
'd501e1',
'd562e1',
'd631e1',
'd708e1',
'd794e1',
'd891e1',
'd100e2',
'd112e2',
'd126e2',
'd141e2',
'd158e2',
'd178e2',
'd200e2',
'd224e2',
'd251e2',
'd282e2',
'd316e2',
'd355e2',
'd398e2',
'd447e2',
'd501e2',
'd562e2',
'd631e2',
'd708e2',
'd794e2',
'd891e2',
'd100e3',
'd112e3',
'd126e3',
'd141e3',
'd158e3',
'd178e3',
'd200e3',
'd224e3',
'd251e3',
'd282e3',
'd316e3',
'd355e3',
'd398e3',
'd447e3',
'd501e3',
'd562e3',
'd631e3',
'd708e3',
'd794e3',
'd891e3',
'd100e4']
if ((station=='HYY') | (station=='KUM') | (station=='VAR')):
col_names = [station + '_DMPS.'+x for x in variables]
tablevariables = ['&tablevariable='+station + '_DMPS.'+x for x in variables]
x = ''.join(list(tablevariables))
dp = [float(x[1:])*0.001*1e-9 for x in variables]
else:
raise Exception('"station" must be "HYY", "KUM" or "VAR"')
if ((start is not None) and (end is not None)):
if (isDatetime(start) & isDatetime(end)):
pass
elif (isStr(start) & isStr(end)):
start=pd.to_datetime(start)
end=pd.to_datetime(end)
else:
raise Exception('"start" and "end" must be datetime objects or strings')
st=start.strftime("%Y-%m-%dT%H:%M:%S")
et=end.strftime("%Y-%m-%dT%H:%M:%S")
url_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\
+x\
+'&from='+st.replace(':','%3A')\
+'&to='+et.replace(':','%3A')\
+'&quality='+quality\
+'&interval='+averaging\
+'&aggregation='+avg_type
try:
data = pd.read_csv(url_string, parse_dates = [[0,1,2,3,4,5]], date_parser=parse)
except:
return pd.DataFrame([])
if data.empty:
return pd.DataFrame([])
else:
data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True)
data.index.names=['time']
data = data.reindex(col_names, axis=1)
data.columns = dp
return data
elif dates is not None:
if isDatetimeIterable(dates):
is_date_list=True
elif isDatetime(dates):
dates = [dates]
is_date_list=False
elif isStrIterable(dates):
dates = pd.to_datetime(dates)
is_date_list=True
elif isStr(dates):
dates = [pd.to_datetime(dates)]
is_date_list=False
else:
raise Exception('"dates" must be datetime object or string or array of datetime objects or strings')
datas = []
for t in dates:
st=t.strftime("%Y-%m-%dT%H:%M:%S")
et=(t+timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%S")
html_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\
+x\
+'&from='+st.replace(':','%3A')\
+'&to='+et.replace(':','%3A')\
+'&quality='+quality\
+'&interval='+averaging\
+'&aggregation='+avg_type
try:
data = pd.read_csv(html_string, parse_dates = [[0,1,2,3,4,5]], date_parser=parse)
except:
datas.append(pd.DataFrame([]))
continue
if data.empty:
datas.append(pd.DataFrame([]))
continue
data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True)
data.index.names=['time']
data = data.reindex(col_names, axis=1)
data.columns = dp
datas.append(data)
if (is_date_list==False):
return datas[0]
else:
return datas
else:
raise Exception('Missing "start" and "end" or "dates"')
Functions
def getData(variables, dates=None, start=None, end=None, quality='ANY', averaging='1', avg_type='NONE')
-
Get timeseries of variables using Smart SMEAR API
Parameters
variables
:string
orarray
ofstrings
- name of a measured quantity in the database (tablevariable)
dates
:datetime object
orstring
orarray
ofdatetime objects
orstrings
- the date(s) for which measurements are downloaded
start
:datetime object
orstring
- begin of measurement
end
:datetime object
orstring
- end of measurement
quality
:string
"ANY"
averaging
:string
"1"
(no averaging),"30"
(30 min) or"60"
(60 min)avg_type
:string
"NONE"
,"ARITHMETIC"
,"MEDIAN"
,"MIN"
or"MAX"
Returns
pandas DataFrame
orlist
ofDataFrames
- downloaded data, list is given for array of date objects
Expand source code
def getData(variables,dates=None,start=None,end=None,quality='ANY',averaging='1',avg_type='NONE'): """ Get timeseries of variables using Smart SMEAR API Parameters ---------- variables : string or array of strings name of a measured quantity in the database (tablevariable) dates : datetime object or string or array of datetime objects or strings the date(s) for which measurements are downloaded start : datetime object or string begin of measurement end : datetime object or string end of measurement quality : string `"ANY"` averaging : string `"1"` (no averaging), `"30"` (30 min) or `"60"` (60 min) avg_type : string `"NONE"`, `"ARITHMETIC"`, `"MEDIAN"`, `"MIN"` or `"MAX"` Returns ------- pandas DataFrame or list of DataFrames downloaded data, list is given for array of date objects """ if isStrIterable(variables): col_names = [x for x in variables] tablevariables = ['&tablevariable='+x for x in variables] variable_string = ''.join(list(tablevariables)) elif isStr(variables): col_names = [variables] variable_string = '&tablevariable='+variables else: raise Exception('"variables" must be string or array of strings') if ((start is not None) and (end is not None) and (dates is not None)): raise Exception('Give either "start" and "end" or "dates"') if ((start is not None) and (end is not None)): if (isDatetime(start) & isDatetime(end)): pass elif (isStr(start) & isStr(end)): start=pd.to_datetime(start) end=pd.to_datetime(end) else: raise Exception('"start" and "end" must be datetime objects or strings') st=start.strftime("%Y-%m-%dT%H:%M:%S") et=end.strftime("%Y-%m-%dT%H:%M:%S") try: url_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\ +variable_string\ +'&from='+st.replace(':','%3A')\ +'&to='+et.replace(':','%3A')\ +'&quality='+quality\ +'&interval='+averaging\ +'&aggregation='+avg_type data = pd.read_csv(url_string, parse_dates = [[0,1,2,3,4,5]], date_parser = parse) except: return pd.DataFrame([]) if data.empty: return pd.DataFrame([]) else: data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True) data.index.names=['time'] data = data.reindex(col_names, axis=1) data.columns = col_names return data elif dates is not None: if isDatetimeIterable(dates): is_date_list=True elif isDatetime(dates): dates = [dates] is_date_list=False elif isStrIterable(dates): dates = pd.to_datetime(dates) is_date_list=True elif isStr(dates): dates = [pd.to_datetime(dates)] is_date_list=False else: raise Exception('"dates" must be datetime object or string or array of datetime objects or strings') datas = [] for t in dates: st=t.strftime("%Y-%m-%dT%H:%M:%S") et=(t+timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%S") try: url_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\ +variable_string\ +'&from='+st.replace(':','%3A')\ +'&to='+et.replace(':','%3A')\ +'&quality='+quality\ +'&interval='+averaging\ +'&aggregation='+avg_type data = pd.read_csv(url_string, parse_dates = [[0,1,2,3,4,5]], date_parser = parse) except: datas.append(pd.DataFrame([])) continue if data.empty: datas.append(pd.DataFrame([])) continue data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True) data.index.names=['time'] data = data.reindex(col_names, axis=1) data.columns = col_names datas.append(data) if (is_date_list==False): return datas[0] else: return datas else: raise Exception('Missing "start" and "end" or "dates"')
def getDmpsData(station='HYY', start=None, end=None, dates=None, quality='ANY', averaging='1', avg_type='NONE')
-
Get DMPS data using Smart SMEAR API
Parameters
station
:string
"HYY"
,"KUM"
or"VAR"
dates
:datetime object
orstring
orarray
ofdatetime objects
orstrings
- the days for which data is are downloaded
start
:datetime object
orstring
- begin time for data
end
:datetime object
orstring
- end time for data
quality
:string
"ANY"
averaging
:string
"1"
(no averaging),"30"
(30 minutes) or"60"
(60 minutes)avg_type
:str
"NONE"
,"ARITHMETIC"
,"MEDIAN"
,"MIN"
or"MAX"
Returns
pandas DataFrame
orlist
ofDataFrames
-
downloaded data, list is given when dates is array
index = time (utc+2)
columns = bin geometric mean diameters (m)
values = dN/dlogDp (cm-3)
Expand source code
def getDmpsData(station='HYY',start=None,end=None,dates=None,quality='ANY',averaging='1',avg_type='NONE'): """ Get DMPS data using Smart SMEAR API Parameters ---------- station : string `"HYY"`, `"KUM"` or `"VAR"` dates : datetime object or string or array of datetime objects or strings the days for which data is are downloaded start : datetime object or string begin time for data end : datetime object or string end time for data quality : string `"ANY"` averaging : string `"1"` (no averaging), `"30"` (30 minutes) or `"60"` (60 minutes) avg_type : str `"NONE"`, `"ARITHMETIC"`, `"MEDIAN"`, `"MIN"` or `"MAX"` Returns ------- pandas DataFrame or list of DataFrames downloaded data, list is given when dates is array index = time (utc+2) columns = bin geometric mean diameters (m) values = dN/dlogDp (cm-3) """ variables = ['d100e1', 'd112e1', 'd126e1', 'd141e1', 'd158e1', 'd178e1', 'd200e1', 'd224e1', 'd251e1', 'd282e1', 'd316e1', 'd355e1', 'd398e1', 'd447e1', 'd501e1', 'd562e1', 'd631e1', 'd708e1', 'd794e1', 'd891e1', 'd100e2', 'd112e2', 'd126e2', 'd141e2', 'd158e2', 'd178e2', 'd200e2', 'd224e2', 'd251e2', 'd282e2', 'd316e2', 'd355e2', 'd398e2', 'd447e2', 'd501e2', 'd562e2', 'd631e2', 'd708e2', 'd794e2', 'd891e2', 'd100e3', 'd112e3', 'd126e3', 'd141e3', 'd158e3', 'd178e3', 'd200e3', 'd224e3', 'd251e3', 'd282e3', 'd316e3', 'd355e3', 'd398e3', 'd447e3', 'd501e3', 'd562e3', 'd631e3', 'd708e3', 'd794e3', 'd891e3', 'd100e4'] if ((station=='HYY') | (station=='KUM') | (station=='VAR')): col_names = [station + '_DMPS.'+x for x in variables] tablevariables = ['&tablevariable='+station + '_DMPS.'+x for x in variables] x = ''.join(list(tablevariables)) dp = [float(x[1:])*0.001*1e-9 for x in variables] else: raise Exception('"station" must be "HYY", "KUM" or "VAR"') if ((start is not None) and (end is not None)): if (isDatetime(start) & isDatetime(end)): pass elif (isStr(start) & isStr(end)): start=pd.to_datetime(start) end=pd.to_datetime(end) else: raise Exception('"start" and "end" must be datetime objects or strings') st=start.strftime("%Y-%m-%dT%H:%M:%S") et=end.strftime("%Y-%m-%dT%H:%M:%S") url_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\ +x\ +'&from='+st.replace(':','%3A')\ +'&to='+et.replace(':','%3A')\ +'&quality='+quality\ +'&interval='+averaging\ +'&aggregation='+avg_type try: data = pd.read_csv(url_string, parse_dates = [[0,1,2,3,4,5]], date_parser=parse) except: return pd.DataFrame([]) if data.empty: return pd.DataFrame([]) else: data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True) data.index.names=['time'] data = data.reindex(col_names, axis=1) data.columns = dp return data elif dates is not None: if isDatetimeIterable(dates): is_date_list=True elif isDatetime(dates): dates = [dates] is_date_list=False elif isStrIterable(dates): dates = pd.to_datetime(dates) is_date_list=True elif isStr(dates): dates = [pd.to_datetime(dates)] is_date_list=False else: raise Exception('"dates" must be datetime object or string or array of datetime objects or strings') datas = [] for t in dates: st=t.strftime("%Y-%m-%dT%H:%M:%S") et=(t+timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%S") html_string = 'https://smear-backend.rahtiapp.fi/search/timeseries/csv?'\ +x\ +'&from='+st.replace(':','%3A')\ +'&to='+et.replace(':','%3A')\ +'&quality='+quality\ +'&interval='+averaging\ +'&aggregation='+avg_type try: data = pd.read_csv(html_string, parse_dates = [[0,1,2,3,4,5]], date_parser=parse) except: datas.append(pd.DataFrame([])) continue if data.empty: datas.append(pd.DataFrame([])) continue data.set_index('Year_Month_Day_Hour_Minute_Second',drop=True,inplace=True) data.index.names=['time'] data = data.reindex(col_names, axis=1) data.columns = dp datas.append(data) if (is_date_list==False): return datas[0] else: return datas else: raise Exception('Missing "start" and "end" or "dates"')
def getVariableMetadata(variables)
-
Get variable metadata using Smart SMEAR API
Parameters
variables
:string
orarray
ofstrings
- name (tablevariable) of a measured quantity in the database
Returns
dictionary
orlist
ofdictionaries
- metadata for given tablevariables
Expand source code
def getVariableMetadata(variables): """ Get variable metadata using Smart SMEAR API Parameters ---------- variables : string or array of strings name (tablevariable) of a measured quantity in the database Returns ------- dictionary or list of dictionaries metadata for given tablevariables """ if isStrIterable(variables): col_names = [x for x in variables] tablevariables = ['&tablevariable='+x for x in variables] variable_string = ''.join(list(tablevariables)) elif isStr(variables): col_names = [variables] variable_string = '&tablevariable='+variables else: raise Exception('"variables" must be string or array of strings') meta_url = 'https://smear-backend.rahtiapp.fi/search/variable?'+variable_string try: with urllib.request.urlopen(meta_url) as url: metadata = json.loads(url.read().decode()) except: metadata = [] return metadata
def listAllData(search_term=None, verbose=False)
-
List and describe variables in the SMEAR database
Parameters
search_term
:string
- search term for searching the database, if
None
all results are returned. verbose
:boolean
- verbose or not verbose output
Returns
pandas DataFrame
Expand source code
def listAllData(search_term=None,verbose=False): """ List and describe variables in the SMEAR database Parameters ---------- search_term : string search term for searching the database, if `None` all results are returned. verbose : boolean verbose or not verbose output Returns ------- pandas DataFrame """ # Find variables that contain the search_term in their title if search_term is not None: if isStr(search_term): pass else: raise Exception('"search_term" should be a string') if isinstance(verbose,bool): pass else: raise Exception('"verbose" should be True or False') variable_meta_url="https://smear-backend.rahtiapp.fi/search/variable" nums = list("0123456789x") numssub = list("₀₁₂₃₄₅₆₇₈₉ₓ") # Variable metadata with urllib.request.urlopen(variable_meta_url) as url: variablemetadata = json.loads(url.read().decode()) # Create dataframe with description and variable name df_verb = pd.DataFrame(columns=['title','tablevariable','description','source']) df_short = pd.DataFrame(columns=['title','tablevariable']) for x in variablemetadata: if verbose: title = str(x['title']) tablevariable = x['tableName']+'.'+x['name'] description = str(x['description']) source = str(x['source']) # Replace subscripted numbers with normal numbers in the names if isStr(title): for i in range(len(nums)): title = title.replace(numssub[i], nums[i]) if isStr(description): for i in range(len(nums)): description = description.replace(numssub[i], nums[i]) if isStr(source): for i in range(len(nums)): source = source.replace(numssub[i], nums[i]) df2 = pd.DataFrame([[ title, tablevariable, description, source]], columns=['title','tablevariable','description','source']) df_verb = df_verb.append(df2,ignore_index=True) else: title = str(x['title']) tablevariable = x['tableName']+'.'+x['name'] # Replace subscripted numbers with normal numbers in the names if isStr(title): for i in range(len(nums)): title = title.replace(numssub[i], nums[i]) df2 = pd.DataFrame([[ title, tablevariable]], columns=['title','tablevariable']) df_short = df_short.append(df2,ignore_index=True) if verbose: df = df_verb else: df = df_short # Find variables that contain the search_term in their title if search_term is not None: df = df.loc[df['title'].str.lower().str.find(search_term.lower())!=-1.0,:] return df