Coverage for src/artemis_sg/cli.py: 79%
208 statements
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 08:01 -0800
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 08:01 -0800
1#!/usr/bin/env python
3import datetime
4import logging
5import os
6import sys
7from time import sleep
9import click
10from rich.console import Console
11from rich.progress import track
12from rich.text import Text
13from selenium.common.exceptions import NoSuchWindowException
15from artemis_sg import scraper, spreadsheet
16from artemis_sg.config import CFG
18MODULE = os.path.splitext(os.path.basename(__file__))[0]
19console = Console()
21v_skip = "{}: skipping due to lack of VENDOR"
22b_skip = "{}: skipping due to lack of WORKBOOK"
25@click.group(chain=True)
26@click.option("-V", "--verbose", is_flag=True, help="enable verbose mode")
27@click.option("-D", "--debug", is_flag=True, help="enable debug mode")
28@click.option("-L", "--logfile", is_flag=True, help="log to file")
29@click.option("-v", "--vendor", default=None, help="Vendor code")
30@click.option(
31 "-b", "--workbook", default=None, help="Workbook (Sheets Doc ID or Excel File)"
32)
33@click.option("-s", "--worksheet", default=None, help="Worksheet within Sheets Doc")
34@click.pass_context
35def cli(ctx, verbose, debug, logfile, vendor, workbook, worksheet): # noqa: PLR0913
36 """artemis_sg is a tool for processing product spreadsheet data.
37 Its subcommands are designed to be used to facilitate the follow primary
38 endpoint conditions:
40 \b
41 * A Google Slide Deck of products
42 * An enhanced Excel spreadsheet
43 * A website order
45 The subcommands can be combined into desired workflows.
47 The base command includes --vendor, --workbook, and --worksheet options.
48 These are used to pass context information to the subcommands. Some
49 subcommands expect --vendor and --workbook values to perform as designed.
51 Example of Google Slide Deck workflow:
53 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\
54 scrape download upload generate -t "Cool Deck"
56 Example of Sheet Image workflow:
58 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\
59 scrape download mkthumbs sheet-image -o "NewFile.xlsx"
60 """
61 namespace = f"{MODULE}.cli"
62 logargs = {"format":"%(levelname)s: %(message)s"}
63 if logfile: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true
64 dt = datetime.datetime.now(tz=datetime.UTC).strftime("%Y%m%d-%H%M%S")
65 logfile_name = f"artemis_sg-{dt}.log"
66 logargs = {**logargs, "filename": logfile_name, "filemode":"w"}
67 if debug: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true
68 logargs = {**logargs, "level": logging.DEBUG}
69 logging.basicConfig(**logargs)
70 logging.debug(f"{namespace}: Debug mode enabled.")
72 elif verbose: 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true
73 logargs = {**logargs, "level": logging.INFO}
74 logging.basicConfig(**logargs)
75 logging.info(f"{namespace}: Verbose mode enabled.")
76 else:
77 logging.basicConfig(**logargs)
79 # load up context object (ctx)
80 ctx.ensure_object(dict)
81 ctx.obj["VENDOR"] = vendor
82 ctx.obj["WORKBOOK"] = workbook
83 ctx.obj["WORKSHEET"] = worksheet
86@cli.command()
87@click.pass_context
88def scrape(ctx):
89 """Scrape web data for vendor from workbook:worksheet
91 Iterates over the item rows in the spreadsheet provided by the
92 --workbook:--worksheet values passed by the base command. The ISBN field
93 is idenfied by the --vendor value passed by the base command. For each
94 ISBN in the WORKBOOK:WORKSHEET, it searches for item descriptions and
95 images in a web browser. It collects this information and stores it in the
96 file defined by the configuration field [asg.data.file.scraped]. If data
97 for an ISBN already exists in the datafile, the ISBN is skipped and does
98 not result in re-scraping data for that record.
100 Scrape supports both Google Sheet ID and Excel file paths for the WORKBOOK
101 value.
103 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
104 used. If the given WORKBOOK contains multiple sheets and the sheet
105 containing the desired data is not the first sheet in the WORKBOOK, the
106 --worksheet will need to be specified for the base command.
108 The command utilizes configuration variables stored in "config.toml" to set
109 the vendor from [asg.vendors] and scraped items database from
110 [asg.data.file.scraped].
111 """
112 cmd = "scrape"
113 if ctx.obj["VENDOR"]:
114 if ctx.obj["WORKBOOK"]: 114 ↛ 127line 114 didn't jump to line 127, because the condition on line 114 was never false
115 sdb = CFG["asg"]["data"]["file"]["scraped"]
116 msg = (
117 f"Scraping web data for '{ctx.obj['VENDOR'] or ''!s}' "
118 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
119 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
120 f"saving data to '{sdb}'..."
121 )
122 click.echo(msg)
123 scraper_wrapper(
124 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb
125 )
126 else:
127 click.echo(b_skip.format(cmd), err=True)
128 else:
129 click.echo(v_skip.format(cmd), err=True)
132@cli.command()
133def download():
134 """
135 Download scraped images
137 Iterates over the data records in the file defined by the configuration
138 field [asg.data.file.scraped]. For each record, it downloads the image
139 files associated with the record to a local directory as defined by the
140 configuration field [asg.data.dir.images].
141 """
142 namespace = f"{MODULE}.download"
144 download_path = CFG["asg"]["data"]["dir"]["images"]
145 click.echo("Downloading images...")
146 logging.debug(f"{namespace}: Download path is: {download_path}")
148 img_downloader_wrapper()
151@cli.command()
152def upload():
153 """
154 Upload local images to Google Cloud Storage Bucket
156 Uploads the files in the directory defined by the configuration field
157 [asg.data.dir.upload_source] to the Google Cloud bucket defined by the
158 configuration field [google.cloud.bucket]. Only the first level of the
159 source directory is uploaded. Subdirectories of the source directory are
160 not traversed for the upload. All uploaded files are prefixed with value
161 defined by the configuration field [google.cloud.bucket_prefix].
162 """
163 namespace = f"{MODULE}.upload"
165 upload_source = CFG["asg"]["data"]["dir"]["upload_source"]
166 click.echo("Uploading images to Google Cloud...")
167 logging.debug(f"{namespace}: Upload source path is: {upload_source}")
169 gcloud_wrapper()
172@cli.command()
173@click.option("-t", "--title",
174 default=CFG["asg"]["slide_generator"]["title_default"],
175 help="Slide deck title")
176@click.pass_context
177def generate(ctx, title):
178 """
179 Generate a Google Slide Deck
182 The slide deck will be given a title based on the values supplied by VENDOR
183 and --title. The title slide will be in the following format:
185 Artemis Book Sales Presents...
186 Vendor Name, Title
188 Iterates over item rows in the spreadsheet provided by the
189 --workbook:--worksheet values passed by the base command. The ISBN field
190 is idenfied by the --vendor value passed by the base command. For each
191 ISBN in the WORKBOOK:WORKSHEET
192 for which it has image data it creates a slide containing the
193 spreadsheet data, the description saved in the file defined by the configuration
194 field [asg.data.file.scraped], and the images saved in the
195 [google.cloud.bucket]. The Google sheet will be saved to the root of the
196 Google Drive associated with the credentials created during initial
197 installation.
199 Generate supports both Google Sheet ID and Excel file paths for the WORKBOOK
200 value.
202 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
203 used. If the given WORKBOOK contains multiple sheets and the sheet
204 containing the desired data is not the first sheet in the WORKBOOK, the
205 --worksheet will need to be specified for the base command.
207 The command utilizes configuration variables stored in "config.toml" to set
208 the vendor from [asg.vendors] and scraped items database from
209 [asg.data.file.scraped].
210 """
211 cmd = "generate"
212 namespace = f"{MODULE}.{cmd}"
214 sdb = CFG["asg"]["data"]["file"]["scraped"]
215 msg = (
216 f"Creating Google Slides deck '{title}' for '{ctx.obj['VENDOR'] or ''!s}' "
217 f"using '{ctx.obj['WORKBOOK'] or ''!s}':'{ctx.obj['WORKSHEET'] or ''!s}'..."
218 )
219 click.echo(msg)
220 logging.debug(f"{namespace}: Scraped Items Database is: {sdb}")
222 try:
223 slide_generator_wrapper(
224 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb, title
225 )
226 except Exception as e:
227 click.echo(f"Could not generate slide deck:{e}", err=True)
228 if not ctx.obj["VENDOR"]: 228 ↛ 230line 228 didn't jump to line 230, because the condition on line 228 was never false
229 click.echo("\tVENDOR not provided", err=True)
230 if not ctx.obj["WORKBOOK"]: 230 ↛ exitline 230 didn't return from function 'generate', because the condition on line 230 was never false
231 click.echo("\tWORKBOOK not provided", err=True)
234@cli.command()
235@click.option("-o", "--output", "out", default="out.xlsx", help="Output file")
236@click.pass_context
237def sheet_image(ctx, out):
238 """
239 Insert item thumbnail images into spreadsheet
241 Iterates over item rows in the spreadsheet provided by the
242 --workbook:--worksheet values passed by the base command. The ISBN field
243 is idenfied by the --vendor value passed by the base command. For each
245 Modifies a local XLSX spreadsheet file provided by the
246 --workbook:--worksheet values passed by the base command to include
247 thumbnail images in the second column for ISBN items (field itentified by
248 --vendor) in which local thumbnail image files are available and saves a
249 new XLSX file.
251 By default, the thumbnail images are obtained from
252 [asg.data.dir.images]/thumbnails and the new XLSX file is saved as
253 "out.xlsx" in the current working directory.
255 NOTE: Currently, the command does not support Google Sheet IDs as a valid
256 WORKBOOK type.
258 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
259 used. If the given WORKBOOK contains multiple sheets and the sheet
260 containing the desired data is not the first sheet in the WORKBOOK, the
261 --worksheet will need to be specified for the base command.
263 The command utilizes configuration variables stored in "config.toml" to set
264 the vendor from [asg.vendors].
265 """
266 cmd = "sheet-image"
267 namespace = f"{MODULE}.sheet_image"
269 if ctx.obj["VENDOR"]: 269 ↛ 295line 269 didn't jump to line 295, because the condition on line 269 was never false
270 if ctx.obj["WORKBOOK"]: 270 ↛ 293line 270 didn't jump to line 293, because the condition on line 270 was never false
271 download_path = CFG["asg"]["data"]["dir"]["images"]
272 image_directory = os.path.join(download_path, "thumbnails")
273 msg = (
274 f"Creating image enhanced spreadsheet for "
275 f"'{ctx.obj['VENDOR'] or ''!s}' "
276 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
277 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
278 f"saving Excel file to '{out}'..."
279 )
280 click.echo(msg)
281 logging.debug(
282 f"{namespace}: Thumbnail Image Directory is: {image_directory}"
283 )
285 sheet_image_wrapper(
286 ctx.obj["VENDOR"],
287 ctx.obj["WORKBOOK"],
288 ctx.obj["WORKSHEET"],
289 image_directory,
290 out,
291 )
292 else:
293 click.echo(b_skip.format(cmd), err=True)
294 else:
295 click.echo(v_skip.format(cmd), err=True)
297@cli.command()
298@click.option("-o", "--output", "out", default="out.xlsx", help="Output file")
299@click.pass_context
300def sheet_waves(ctx, out):
301 """
302 Insert data columns into spreadsheet
304 \b
305 * Description
306 * Dimension
307 * ImageURL0-6
309 Modifies a local XLSX spreadsheet file provided by the
310 --workbook:--worksheet values passed by the base command to include
311 additional columns for ISBN items (field identified by
312 --vendor) and saves a
313 new XLSX file.
315 Iterates over item rows in the spreadsheet provided by the
316 --workbook:--worksheet values passed by the base command. The ISBN field
317 is identified by the --vendor value passed by the base command. For each,
318 values are inserted into the added spreadsheet columns
320 By default, the new XLSX file is saved as "out.xlsx" in the current working
321 directory.
323 NOTE: Currently, the command does not support Google Sheet IDs as a valid
324 WORKBOOK type.
326 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
327 used. If the given WORKBOOK contains multiple sheets and the sheet
328 containing the desired data is not the first sheet in the WORKBOOK, the
329 --worksheet will need to be specified for the base command.
331 The command utilizes configuration variables stored in "config.toml" to set
332 the vendor from [asg.vendors].
333 """
334 cmd = "sheet-waves"
335 if ctx.obj["VENDOR"]: 335 ↛ 357line 335 didn't jump to line 357, because the condition on line 335 was never false
336 if ctx.obj["WORKBOOK"]: 336 ↛ 355line 336 didn't jump to line 355, because the condition on line 336 was never false
337 msg = (
338 f"Creating waves import spreadsheet for "
339 f"'{ctx.obj['VENDOR'] or ''!s}' "
340 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
341 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
342 f"saving Excel file to '{out}'..."
343 )
344 click.echo(msg)
346 sdb = CFG["asg"]["data"]["file"]["scraped"]
347 sheet_waves_wrapper(
348 ctx.obj["VENDOR"],
349 ctx.obj["WORKBOOK"],
350 ctx.obj["WORKSHEET"],
351 out,
352 sdb
353 )
354 else:
355 click.echo(b_skip.format(cmd), err=True)
356 else:
357 click.echo(v_skip.format(cmd), err=True)
360@cli.command()
361@click.option(
362 "--image-directory",
363 default=CFG["asg"]["data"]["dir"]["images"],
364 help="Image directory",
365)
366def mkthumbs(image_directory):
367 """
368 Create thumbnails of images in IMAGE_DIRECTORY
370 Creates thumbnail images from images located in a given directory. These
371 thumbnail images are saved to a "thumbnails" subdirectory in the original
372 image directory. These files are given the same names as their originals.
374 By default, the command will use the directory defined by the configuration
375 field [asg.data.dir.images] and size them to the dimensions defined by
376 [asg.spreadsheet.mkthumbs.width] and [asg.spreadsheet.mkthumbs.height].
377 """
378 namespace = f"{MODULE}.mkthumbs"
380 click.echo(f"Creating thumbnails of images in '{image_directory}'...")
381 logging.debug(f"{namespace}: Image Directory is: {image_directory}")
383 mkthumbs_wrapper(image_directory)
386@cli.command()
387@click.option("--email", "email", default="", help="TB Customer email to impersonate")
388@click.option(
389 "--timeout", "timeout", default="600", help="Maximum time to hold browser open"
390)
391@click.pass_context
392def order(ctx, email, timeout):
393 """
394 Add items to be ordered to website cart of vendor from spreadsheet
396 Populates the website cart for a given --vendor with items from a
397 --workbook:--worksheet. The WORKSHEET MUST contain an "Order" column from
398 which the command will get the quantity of each item to put into the cart.
400 The browser instance with the populated cart is left open for the user to
401 review and manually complete the order. The user will be asked to manually
402 login during the execution of this command.
404 NOTE: Currently, this command does not support Google Sheet IDs as a valid
405 WORKBOOK type.
407 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
408 used. If the given WORKBOOK contains multiple sheets and the sheet
409 containing the desired data is not the first sheet in the WORKBOOK, the
410 --worksheet will need to be specified for the base command.
412 NOTE: The browser opened by this command is controlled by this command.
413 The browser will automatically close and the session will be terminated at
414 the end of the defined waiting period. If the web order has not been
415 completed by the end of the waiting period, the cart may be lost depending
416 on how the website handles its session data.
418 The command utilizes configuration variables stored in "config.toml" to set
419 the vendor from [asg.vendors].
420 """
421 cmd = "order"
422 timeout = int(timeout)
423 if ctx.obj["VENDOR"]: 423 ↛ 443line 423 didn't jump to line 443, because the condition on line 423 was never false
424 if ctx.obj["WORKBOOK"]: 424 ↛ 441line 424 didn't jump to line 441, because the condition on line 424 was never false
425 msg = (
426 f"Creating web order for '{ctx.obj['VENDOR'] or ''!s}' "
427 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
428 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
429 f"Adding items to cart..."
430 )
431 click.echo(msg)
433 order_wrapper(
434 email,
435 ctx.obj["VENDOR"],
436 ctx.obj["WORKBOOK"],
437 ctx.obj["WORKSHEET"],
438 timeout,
439 )
440 else:
441 click.echo(b_skip.format(cmd), err=True)
442 else:
443 click.echo(v_skip.format(cmd), err=True)
446# wrappers to make the cli testable
447def slide_generator_wrapper(vendor, sheet_id, worksheet, sdb, title):
448 from artemis_sg import slide_generator
450 slide_generator.main(vendor, sheet_id, worksheet, sdb, title)
453def gcloud_wrapper():
454 from artemis_sg import gcloud
456 gcloud.main()
459def img_downloader_wrapper():
460 from artemis_sg import img_downloader
462 img_downloader.main()
465def scraper_wrapper(vendor, sheet_id, worksheet, sdb):
466 from artemis_sg import scraper
468 scraper.main(vendor, sheet_id, worksheet, sdb)
471def sheet_image_wrapper(vendor, workbook, worksheet, image_directory, out):
472 spreadsheet.sheet_image(vendor, workbook, worksheet, image_directory, out)
475def mkthumbs_wrapper(image_directory):
476 spreadsheet.mkthumbs(image_directory)
479def order_wrapper(email, vendor, workbook, worksheet, timeout=600):
480 order_items = spreadsheet.get_order_items(vendor, workbook, worksheet)
481 driver, scrapr = get_driver_scraper(vendor, email)
482 if scrapr: 482 ↛ 520line 482 didn't jump to line 520, because the condition on line 482 was never false
483 scrapr.load_login_page()
484 scrapr.login()
485 if vendor == "tb":
486 scrapr.impersonate(email)
487 for item, qty in order_items:
488 if vendor == "tb":
489 item_num = scrapr.search_item_num(item)
490 if not item_num: 490 ↛ 491line 490 didn't jump to line 491, because the condition on line 490 was never true
491 continue
492 else:
493 item_num = item
494 res = scrapr.load_item_page(item_num)
495 if res: 495 ↛ 487line 495 didn't jump to line 487, because the condition on line 495 was never false
496 scrapr.add_to_cart(qty)
497 scrapr.load_cart_page()
498 input_text = Text(
499 """
500 ******** USER INPUT REQUIRED ********
501 Locate the selenium controlled browser
502 and manually review and complete your order.
503 ******** WAITING FOR USER INPUT ********
504 """
505 )
506 input_text.stylize("bold cyan")
507 console.print(input_text)
508 warn_text = Text(
509 f"WARNING: The browser session will terminate in {timeout} seconds!!!!"
510 )
511 warn_text.stylize("bold red")
512 console.print(warn_text)
513 for _i in track( 513 ↛ 516line 513 didn't jump to line 516, because the loop on line 513 never started
514 range(timeout), description="[red]COUNTING DOWN TIME REMAINING..."
515 ):
516 if not is_browser_alive(driver):
517 break
518 sleep(1)
519 else:
520 logging.error(
521 f"order: VENDOR '{vendor}' is not supported by the order command."
522 )
523 sys.exit(1)
526def sheet_waves_wrapper(vendor, workbook, worksheet, out, scraped_items_db):
527 spreadsheet.sheet_waves(vendor, workbook, worksheet, out, scraped_items_db)
530def get_driver_scraper(vendor, email=None):
531 if vendor == "tb":
532 if not email: 532 ↛ 533line 532 didn't jump to line 533, because the condition on line 532 was never true
533 logging.error(
534 f"order: VENDOR '{vendor}' requires the '--email' option to be set."
535 )
536 sys.exit(1)
537 driver = scraper.get_driver()
538 scrapr = scraper.TBScraper(driver)
539 elif vendor == "gj": 539 ↛ 542line 539 didn't jump to line 542, because the condition on line 539 was never false
540 driver = scraper.get_driver()
541 scrapr = scraper.GJScraper(driver)
542 elif vendor == "sd":
543 driver = scraper.get_driver()
544 scrapr = scraper.SDScraper(driver)
545 else:
546 driver = scrapr = None
547 return driver, scrapr
550def is_browser_alive(driver):
551 try:
552 url = driver.current_url
553 if url:
554 return True
555 except (AttributeError, NoSuchWindowException):
556 return False
559if __name__ == "__main__":
560 cli()