#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" The ciq_events program is used to scrape and process events.
"""
import argparse
import logging
import sys
from cityiq import __version__, CityIqError
from cityiq.task import DownloadTask
from cityiq.util import event_type_to_location_type
from progress.bar import ShadyBar as Bar
__author__ = "Eric Busboom"
__copyright__ = "Eric Busboom"
__license__ = "mit"
_logger = logging.getLogger(__name__)
valid_events = ['PKIN', 'PKOUT', 'PEDEVT', 'TFEVT', 'BICYCLE']
ve_string = ','.join(valid_events)
[docs]class ProgressBar(Bar):
_downloaded = 0
_extant = 0
@property
def downloaded(self):
return self._downloaded
@property
def extant(self):
return self._extant
[docs] def update_task(self, task):
if task.downloaded is True:
self._downloaded += 1
elif task.downloaded is False:
self._extant += 1
[docs]def make_parser():
"""Download events and load them into the cache.
The :program:`ciq_events` program will request events from a CityIQ system, one
day at a tim, and cache the results. It will request the events from
assets, based on which assets have ``eventTypes`` with the requested events.
Because the program will request events for all of the assets that report an
event type and makes one request per day, it can generate very large numbers of
requests and take many hours to run. For instance this request:
ciq_events -s 2020-01-01 -e 2020-06-01-01 PKIN PKOUT
generates about 800,000 requests and will take a day to run.
The `cityiq` module will not cache event requests for the current day or
any day in the future.
"""
parser = argparse.ArgumentParser(description=make_parser.__doc__,prog='ciq_events')
parser.add_argument('--version', action='version', version='cityiq {ver}'.format(ver=__version__))
parser.add_argument('-v', '--verbose', dest="loglevel", help="set loglevel to INFO", action='store_const',
const=logging.INFO)
parser.add_argument('-vv', '--very-verbose', dest="loglevel", help="set loglevel to DEBUG", action='store_const',
const=logging.DEBUG)
parser.add_argument('-c', '--config', help='Path to configuration file')
parser.add_argument('-w', '--workers', help='Number of threads to use', default=4, type=int)
parser.add_argument('-s', '--start-time', help='Starting time, in iso format. If not specified, use the '
"configuration value 'start_time' ")
parser.add_argument('-f', '--end-time', help='Ending time, in iso format. If not specified, end time is yesterday ')
parser.add_argument('-e', '--events', nargs='+', help='Names of events to scrape. One or more of: '+ve_string)
parser.add_argument('-o','--output-name',
help='Output file, where events are written in CSV format')
parser.add_argument('-O', '--output', action='store_true',
help='Coalesce data into one CSV file per asset')
return parser
parser = make_parser()
[docs]def setup_logging(loglevel):
"""Setup basic logging
Args:
loglevel (int): minimum loglevel for emitting messages
"""
logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s"
logging.basicConfig(level=loglevel, stream=sys.stdout,
format=logformat, datefmt="%Y-%m-%d %H:%M:%S")
_logger.setLevel(loglevel)
[docs]def main(args):
try:
_main(args)
except (BrokenPipeError, KeyboardInterrupt):
pass
def _main(args):
"""Main entry point allowing external calls
Args:
args ([str]): command line parameter list
"""
from datetime import datetime, timezone
from cityiq import Config, CityIq
args = parser.parse_args(args)
if args.loglevel:
setup_logging(args.loglevel)
if args.config:
config = Config(args.config)
else:
config = Config()
if not config.client_id:
print("ERROR: Did not get valid config file. Use --config option or CITYIQ_CONFIG env var")
sys.exit(1)
start_time_str = str(args.start_time or config.start_time)
if not start_time_str:
print("ERROR: Must specify a start time on the command line or in the config")
sys.exit(1)
print("Using config:", config._config_file)
events = [e.upper() for e in args.events]
try:
[event_type_to_location_type(e) for e in events]
except CityIqError as e:
print(f"Unknown event type: {e}. Must be . One or more of: "+ve_string)
sys.exit(1)
c = CityIq(config)
end_time = c.convert_time(args.end_time)
start_time = c.convert_time(start_time_str)
assets = list(c.assets_by_event(events)) # Get all assets that have the Bicycle event
print(f"{len(assets)} assets")
tasks = c.make_tasks(assets, events, start_time, end_time)
if not args.output:
with ProgressBar('Downloading', max=len(tasks),
suffix='%(index)d of %(max)d (%(percent).1f%%) - ETA %(eta_td)s') as bar:
# suffix='%(index)d of %(max)d (%(percent).1f%%) %(extant)d extant %(downloaded)d downloaded - ETA %(eta_td)s') as bar:
for i, (task, result) in enumerate(c.run_async(tasks)):
bar.next()
bar.update_task(task)
else:
import pandas as pd
from tqdm import tqdm
from pathlib import Path
event_name = '-'.join(e.lower() for e in events)
for a in tqdm(assets, desc='Assets'):
name = f"{event_name}_{start_time.date().isoformat()}_{end_time.date().isoformat()}/{a.uid}.csv"
files = list(c.get_cache_files([a], events, start_time, end_time))
frames = []
for f in tqdm(files, desc="Frames", leave=False):
frames.append(pd.read_csv(f))
if frames:
df = pd.concat(frames, sort=False)
# df['timestamp'] = pd.to_datetime(df.timestamp)
p = Path(name)
if not p.parent.exists():
p.parent.mkdir()
df.to_csv(name)
print("Done")
[docs]def run():
"""Entry point for console_scripts
"""
main(sys.argv[1:])
if __name__ == "__main__":
run()