Coverage for src/artemis_sg/spreadsheet.py: 84%
299 statements
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 08:01 -0800
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 08:01 -0800
1import logging
2import math
3import os
4import re
5from copy import copy
6from inspect import getsourcefile
8from googleapiclient.discovery import build
9from openpyxl import load_workbook
10from openpyxl.drawing.image import Image
11from openpyxl.styles import Alignment
12from openpyxl.utils import get_column_letter
13from openpyxl.utils.exceptions import InvalidFileException
14from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder
15from PIL import Image as PIL_Image
16from PIL import UnidentifiedImageError
18from artemis_sg import app_creds, items, vendor
19from artemis_sg.config import CFG
21MODULE = os.path.splitext(os.path.basename(__file__))[0]
24def get_worksheet(wb_obj, worksheet):
25 ws = wb_obj.worksheets[0] if not worksheet else wb_obj[worksheet]
26 return ws
28def get_sheet_keys(ws):
29 for row in ws.values: 29 ↛ 32line 29 didn't jump to line 32, because the loop on line 29 didn't complete
30 sheet_keys = [x.upper() if isinstance(x, str) else x for x in row]
31 break
32 return sheet_keys
34def shift_col(ws, col_key, target_idx):
35 ws.insert_cols(target_idx)
36 sheet_keys = get_sheet_keys(ws)
37 sheet_key_idx = sheet_keys.index(col_key) + 1 # for openpyxl
38 sheet_key_idx_ltr = get_column_letter(sheet_key_idx)
39 col_delta = target_idx - sheet_key_idx
40 ws.move_range(f"{sheet_key_idx_ltr}1:{sheet_key_idx_ltr}{ws.max_row}",
41 rows=0, cols=col_delta)
42 ws.delete_cols(sheet_key_idx)
44def copy_cell_style(ws, style_src_cell, target_cell):
45 if style_src_cell.has_style:
46 ws[target_cell].font = copy(style_src_cell.font)
47 ws[target_cell].border = copy(style_src_cell.border)
48 ws[target_cell].fill = copy(style_src_cell.fill)
49 ws[target_cell].number_format = copy(style_src_cell.number_format)
50 ws[target_cell].protection = copy(style_src_cell.protection)
51 ws[target_cell].alignment = copy(style_src_cell.alignment)
53def create_col(ws, col_key, target_idx, style_src_cell=None):
54 ws.insert_cols(target_idx)
55 col_header = f"{get_column_letter(target_idx)}1"
56 ws[col_header] = col_key.title()
57 if style_src_cell: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true
58 copy_cell_style(ws, style_src_cell, col_header)
60def sequence_worksheet(ws, col_order, isbn_key):
61 sheet_keys = get_sheet_keys(ws)
62 for i, key_name in enumerate(col_order):
63 order_idx = i + 1 # for openpyxl
64 if key_name == "ISBN":
65 key_name = isbn_key # noqa: PLW2901
66 if key_name in sheet_keys:
67 shift_col(ws, key_name, order_idx)
68 else:
69 create_col(ws, key_name, order_idx)
71def size_sheet_cols(ws, isbn_key):
72 dim_holder = DimensionHolder(worksheet=ws)
73 sheet_keys = get_sheet_keys(ws)
74 for i, key_name in enumerate(sheet_keys):
75 col_idx = i + 1 # for openpyxl
76 col_idx_ltr = get_column_letter(col_idx)
77 width = (
78 max(len(str(cell.value)) for cell in ws[col_idx_ltr])
79 * CFG["asg"]["spreadsheet"]["sheet_image"]["col_buffer"]
80 )
81 if width > CFG["asg"]["spreadsheet"]["sheet_image"]["max_col_width"]: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true
82 width = CFG["asg"]["spreadsheet"]["sheet_image"]["max_col_width"]
83 dim_holder[col_idx_ltr] = ColumnDimension(ws, index=col_idx_ltr, width=width)
84 if key_name == isbn_key:
85 dim_holder[col_idx_ltr] = ColumnDimension(
86 ws,
87 index=col_idx_ltr,
88 width=math.ceil(
89 CFG["asg"]["spreadsheet"]["sheet_image"]["isbn_col_width"]
90 * CFG["asg"]["spreadsheet"]["sheet_image"]["col_buffer"]
91 ),
92 )
93 if key_name == "IMAGE":
94 dim_holder[col_idx_ltr] = ColumnDimension(
95 ws,
96 index=col_idx_ltr,
97 width=CFG["asg"]["spreadsheet"]["sheet_image"]["image_col_width"]
98 )
100 ws.column_dimensions = dim_holder
102def insert_image(image_directory, ws, isbn_cell, image_cell):
103 namespace = f"{MODULE}.{insert_image.__name__}"
104 image_row_height = CFG["asg"]["spreadsheet"]["sheet_image"]["image_row_height"]
105 if isbn_cell.value: 105 ↛ 119line 105 didn't jump to line 119, because the condition on line 105 was never false
106 isbn = isbn_cell.value
107 if isinstance(isbn, float):
108 isbn = int(isbn)
109 elif isinstance(isbn, str): 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true
110 m = re.search('="(.*)"', isbn)
111 if m:
112 isbn = m.group(1)
113 try:
114 isbn = str(isbn).strip()
115 except Exception as e:
116 logging.error(f"{namespace}: Err reading isbn '{isbn}', err: '{e}'")
117 isbn = ""
118 else:
119 isbn = ""
120 # Set row height
121 row_dim = ws.row_dimensions[image_cell.row]
122 row_dim.height = image_row_height
124 # Insert image into cell
125 filename = f"{isbn}.jpg"
126 filepath = os.path.join(image_directory, filename)
127 logging.debug(f"{namespace}: Attempting to insert '{filepath}'.")
128 if os.path.isfile(filepath):
129 img = Image(filepath)
130 ws.add_image(img, f"{image_cell.column_letter}{image_cell.row}")
131 logging.info(f"{namespace}: Inserted '{filepath}'.")
134def sheet_image(vendor_code, workbook, worksheet, image_directory, out):
135 namespace = f"{MODULE}.{sheet_image.__name__}"
137 # get vendor info from database
138 logging.debug(f"{namespace}: Instantiate vendor.")
139 vendr = vendor.Vendor(vendor_code)
140 vendr.set_vendor_data()
142 isbn_key = vendr.isbn_key
143 logging.debug(f"{namespace}: Setting ISBN_KEY to '{isbn_key}'.")
145 # Load worksheet
146 logging.info(f"{namespace}: Workbook is {workbook}")
147 wb = load_workbook(workbook)
148 ws = get_worksheet(wb, worksheet)
149 logging.info(f"{namespace}: Worksheet is {ws.title}")
151 col_order = CFG["asg"]["spreadsheet"]["sheet_image"]["col_order"]
152 sequence_worksheet(ws, col_order, isbn_key)
153 size_sheet_cols(ws, isbn_key)
155 # Prepare "IMAGE" column
156 sk = get_sheet_keys(ws)
157 try:
158 img_idx = sk.index("IMAGE") + 1
159 img_idx_ltr = get_column_letter(img_idx)
160 except ValueError as e:
161 logging.error(f"{namespace}: Err finding 'IMAGE' column in sheet '{workbook}'.")
162 logging.error("Aborting.")
163 raise e
164 try:
165 isbn_idx = sk.index(isbn_key) + 1
166 isbn_idx_ltr = get_column_letter(isbn_idx)
167 except ValueError as e:
168 logging.error(
169 f"{namespace}: Err, no '{isbn_key}' column in sheet '{workbook}'.")
170 logging.error("Aborting.")
171 raise e
173 for i in range(1, ws.max_row):
174 isbn_cell = ws[f"{isbn_idx_ltr}{i+1}"]
175 image_cell = ws[f"{img_idx_ltr}{i+1}"]
176 # Format to center content
177 image_cell.alignment = Alignment(horizontal="center")
178 insert_image(image_directory, ws, isbn_cell, image_cell)
180 # Save workbook
181 wb.save(out)
184def validate_isbn(isbn):
185 namespace = f"{MODULE}.{validate_isbn.__name__}"
186 valid_isbn = ""
187 if isinstance(isbn, str): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true
188 m = re.search('="(.*)"', isbn)
189 if m:
190 isbn = m.group(1)
191 try:
192 valid_isbn = str(int(isbn)).strip()
193 except Exception as e:
194 logging.error(f"{namespace}: Err reading isbn '{isbn}', err: '{e}'")
195 valid_isbn = ""
196 return valid_isbn
199def validate_qty(qty):
200 namespace = f"{MODULE}.{validate_qty.__name__}"
201 try:
202 valid_qty = str(int(qty)).strip()
203 except Exception as e:
204 logging.error(f"{namespace}: Err reading Order qty '{qty}', err: '{e}'")
205 valid_qty = None
206 return valid_qty
209def get_order_items(vendor_code, workbook, worksheet):
210 namespace = f"{MODULE}.{get_order_items.__name__}"
212 order_items = []
213 # get vendor info from database
214 logging.debug(f"{namespace}: Instantiate vendor.")
215 vendr = vendor.Vendor(vendor_code)
216 vendr.set_vendor_data()
218 isbn_key = vendr.isbn_key
219 logging.debug(f"{namespace}: Setting ISBN_KEY to '{isbn_key}'.")
221 # Load worksheet
222 logging.info(f"{namespace}: Workbook is {workbook}")
223 wb = load_workbook(workbook)
224 ws = get_worksheet(wb, worksheet)
225 logging.info(f"{namespace}: Worksheet is {ws.title}")
227 # Find Isbn and Order column letters
228 row01 = ws[1]
229 for cell in row01:
230 if cell.value == isbn_key:
231 isbn_column_letter = cell.column_letter
232 if cell.value == "Order":
233 order_column_letter = cell.column_letter
235 for row in ws.iter_rows(min_row=2):
236 for cell in row:
237 if cell.column_letter == isbn_column_letter:
238 isbn_cell = cell
239 if cell.column_letter == order_column_letter:
240 order_cell = cell
241 # Validate ISBN
242 isbn = validate_isbn(isbn_cell.value)
243 if not isbn: 243 ↛ 244line 243 didn't jump to line 244, because the condition on line 243 was never true
244 continue
245 # Validate Order Qty
246 qty = validate_qty(order_cell.value)
247 if not qty: 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true
248 continue
249 order_items.append((isbn, qty))
251 return order_items
254def mkthumbs(image_directory):
255 namespace = f"{MODULE}.{mkthumbs.__name__}"
257 thumb_width = CFG["asg"]["spreadsheet"]["mkthumbs"]["width"]
258 thumb_height = CFG["asg"]["spreadsheet"]["mkthumbs"]["height"]
260 here = os.path.dirname(getsourcefile(lambda: 0)) 260 ↛ exitline 260 didn't run the lambda on line 260
261 data = os.path.abspath(os.path.join(here, "data"))
262 logo = os.path.join(data, "artemis_logo.png")
263 logging.debug(f"{namespace}: Found image for thumbnail background at '{logo}'")
264 sub_dir = "thumbnails"
265 back = PIL_Image.open(logo)
266 thumb_dir = os.path.join(image_directory, sub_dir)
267 logging.debug(f"{namespace}: Defining thumbnail directory as '{thumb_dir}'")
268 if not os.path.isdir(thumb_dir): 268 ↛ 278line 268 didn't jump to line 278, because the condition on line 268 was never false
269 logging.debug(f"{namespace}: Creating directory '{thumb_dir}'")
270 os.mkdir(thumb_dir)
271 if os.path.isdir(thumb_dir): 271 ↛ 274line 271 didn't jump to line 274, because the condition on line 271 was never false
272 logging.info(f"{namespace}: Successfully created directory '{thumb_dir}'")
273 else:
274 logging.error(
275 f"{namespace}: Failed to create directory '{thumb_dir}'. Aborting."
276 )
277 raise Exception
278 files = os.listdir(image_directory)
279 for f in files:
280 # Valid files are JPG or PNG that are not supplemental images.
281 image = re.match(r"^.+\.(?:jpg|png)$", f)
282 if not image:
283 continue
284 # Supplemental images have a "-[0-9]+" suffix before the file type.
285 # AND a file without that suffix exists int he image_directory.
286 suffix = re.match(r"(^.+)-[0-9]+(\.(?:jpg|png))$", f)
287 if suffix:
288 primary = suffix.group(1) + suffix.group(2)
289 primary_path = os.path.join(image_directory, primary)
290 if os.path.isfile(primary_path):
291 continue
292 thumb_file = os.path.join(thumb_dir, f)
293 # don't remake thumbnails
294 if os.path.isfile(thumb_file): 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true
295 continue
296 bk = back.copy()
297 try:
298 file_path = os.path.join(image_directory, f)
299 fg = PIL_Image.open(file_path)
300 except UnidentifiedImageError:
301 logging.error(f"{namespace}: Err reading '{f}', deleting '{file_path}'")
302 os.remove(file_path)
303 continue
304 fg.thumbnail((thumb_width, thumb_height))
305 size = (int((bk.size[0] - fg.size[0]) / 2), int((bk.size[1] - fg.size[1]) / 2))
306 bk.paste(fg, size)
307 logging.debug(f"{namespace}: Attempting to save thumbnail '{thumb_file}'")
308 bkn = bk.convert("RGB")
309 bkn.save(thumb_file)
310 logging.info(f"{namespace}: Successfully created thumbnail '{thumb_file}'")
313def get_sheet_data(workbook, worksheet=None):
314 namespace = f"{MODULE}.{get_sheet_data.__name__}"
315 #########################################################################
316 # Try to open sheet_id as an Excel file
317 sheet_data = []
318 try:
319 wb = load_workbook(workbook)
320 ws = get_worksheet(wb, worksheet)
321 for row in ws.values:
322 sheet_data.append(row)
323 except (FileNotFoundError, InvalidFileException):
324 #########################################################################
325 # Google specific stuff
326 # authenticate to google sheets
327 logging.info(f"{namespace}: Authenticating to google api.")
328 creds = app_creds.app_creds()
329 sheets_api = build("sheets", "v4", credentials=creds)
330 # get sheet data
331 if not worksheet: 331 ↛ 340line 331 didn't jump to line 340, because the condition on line 331 was never false
332 sheets = (
333 sheets_api.spreadsheets()
334 .get(spreadsheetId=workbook)
335 .execute()
336 .get("sheets", "")
337 )
338 ws = sheets.pop(0).get("properties", {}).get("title")
339 else:
340 ws = worksheet
341 sheet_data = (
342 sheets_api.spreadsheets()
343 .values()
344 .get(range=ws, spreadsheetId=workbook)
345 .execute()
346 .get("values")
347 )
348 #########################################################################
349 return sheet_data
352def sheet_waves(vendor_code, workbook, worksheet, out, scraped_items_db):
353 namespace = f"{MODULE}.{sheet_waves.__name__}"
355 addl_data_columns = [
356 "Description",
357 "Dimension",
358 ]
359 addl_image_columns = [
360 "ImageURL0",
361 "ImageURL1",
362 "ImageURL2",
363 "ImageURL3",
364 "ImageURL4",
365 "ImageURL5",
366 "ImageURL6",
367 ]
368 addl_columns = addl_data_columns + addl_image_columns
369 # get vendor info from database
370 logging.debug(f"{namespace}: Instantiate vendor.")
371 vendr = vendor.Vendor(vendor_code)
372 vendr.set_vendor_data()
374 isbn_key = vendr.isbn_key
375 logging.debug(f"{namespace}: Setting ISBN_KEY to '{isbn_key}'.")
377 sheet_data = get_sheet_data(workbook, worksheet)
379 sheet_keys = [x for x in sheet_data.pop(0) if x] # filter out None
380 items_obj = items.Items(sheet_keys, sheet_data, vendr.isbn_key)
381 items_obj.load_scraped_data(scraped_items_db)
383 # Load worksheet
384 logging.info(f"{namespace}: Workbook is {workbook}")
385 wb = load_workbook(workbook)
386 ws = get_worksheet(wb, worksheet)
387 logging.info(f"{namespace}: Worksheet is {ws.title}")
389 # Append columns
390 col_insert_idx = ws.max_column + 1
391 ws.insert_cols(col_insert_idx, len(addl_columns))
392 i = 1
393 for col in addl_columns:
394 col_idx = col_insert_idx + i
395 ws.cell(row=1, column=col_idx, value=col)
396 i = i + 1
398 # Find ISBN column
399 row01 = ws[1]
400 isbn_idx = None
401 for cell in row01: 401 ↛ 405line 401 didn't jump to line 405, because the loop on line 401 didn't complete
402 if isinstance(cell.value, str) and cell.value.upper() == isbn_key.upper():
403 isbn_idx = cell.column - 1
404 break
405 if isbn_idx is None: 405 ↛ 406line 405 didn't jump to line 406, because the condition on line 405 was never true
406 logging.error(f"{namespace}: Err no isbn column in spreadsheet")
407 raise Exception
409 # Insert data in cells
410 for row in ws.iter_rows(min_row=2):
411 # get isbn cell
412 isbn = str(row[isbn_idx].value)
413 # find items_obj matching isbn
414 item = items_obj.find_item(isbn)
415 if item:
416 idx = col_insert_idx
417 for key in addl_data_columns:
418 if key.upper() in item.data: 418 ↛ 420line 418 didn't jump to line 420, because the condition on line 418 was never false
419 row[idx].value = item.data[key.upper()]
420 idx = idx + 1
421 for img_url in item.image_urls[:7]:
422 row[idx].value = img_url
423 idx = idx + 1
425 # Save workbook
426 wb.save(out)