Coverage for src/artemis_sg/items.py: 90%
91 statements
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 08:01 -0800
« prev ^ index » next coverage.py v7.3.1, created at 2024-03-06 08:01 -0800
1import json
2import logging
3import string
5from artemis_sg.item import Item
8class Items:
9 """
10 Collection object for artemis_slide_generaor.Item objects.
11 """
13 # Constants
14 ALPHA_LIST = tuple(string.ascii_uppercase)
16 # methods
17 def __init__(self, keys, value_list, isbn_key):
18 """
19 Instantiate Items object
21 Arguments:
22 keys -- list of strings to use as item keys
23 value_list -- list of value lists, nested list positions correspond to keys
24 isbn_key -- the key in keys that corresponds with ISBN (primary key)
26 Returns:
27 Items object
28 """
29 namespace = f"{type(self).__name__}.{self.__init__.__name__}"
31 len_keys = len(keys)
32 len_vals = len(value_list[0])
33 if len_keys != len_vals: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 logging.error(
35 f"{namespace}: Key count ({len_keys}) "
36 f"does not match value count ({len_vals})."
37 )
38 logging.debug(f"keys: {keys}")
39 logging.debug(f"first_row values: {value_list[0]}")
40 raise IndexError
42 self.isbn_key = isbn_key
43 self.column_dict = dict(zip(keys, Items.ALPHA_LIST))
45 self.items = []
46 for row_num, entry in enumerate(value_list):
47 i = Item(keys, entry, row_num, self.isbn_key)
48 if any(i.data.values()):
49 self.items.append(i)
51 def get_items(self):
52 return self.items
54 def __iter__(self):
55 return iter(self.items)
57 def get_json_data_from_file(self, datafile):
58 namespace = f"{type(self).__name__}.{self.get_json_data_from_file.__name__}"
59 try:
60 with open(datafile) as filepointer:
61 data = json.load(filepointer)
62 filepointer.close()
63 return data
64 except FileNotFoundError:
65 logging.error(f"{namespace}: Datafile '{datafile}' not found")
66 return {}
67 except json.decoder.JSONDecodeError:
68 logging.error(
69 f"{namespace}: Datafile '{datafile}' did not contain valid JSON"
70 )
71 return {}
73 def load_scraped_data(self, datafile):
74 data = self.get_json_data_from_file(datafile)
75 self.set_scraped_data(data)
77 def save_scraped_data(self, datafile):
78 namespace = f"{type(self).__name__}.{self.save_scraped_data.__name__}"
80 internal_data = self.get_scraped_data()
81 external_data = self.get_json_data_from_file(datafile)
82 external_data.update(internal_data)
83 if external_data:
84 logging.debug(f"{namespace}: attempting to open {datafile}")
85 with open(datafile, "w+") as filepointer:
86 logging.debug(f"{namespace}: dumping scraped data to {datafile}")
87 json.dump(external_data, filepointer, indent=4)
88 filepointer.close()
90 def set_scraped_data(self, data):
91 for isbn in data:
92 item = self.find_item(isbn)
93 if not item:
94 continue
95 try:
96 item.data["DESCRIPTION"] = data[isbn]["DESCRIPTION"]
97 except KeyError:
98 item.data["DESCRIPTION"] = ""
99 try:
100 item.data["DIMENSION"] = data[isbn]["DIMENSION"]
101 except KeyError:
102 item.data["DIMENSION"] = ""
103 item.image_urls = data[isbn]["image_urls"]
105 def get_scraped_data(self):
106 data = {}
107 for item in self.items:
108 if item.image_urls != []:
109 data_elem = {}
110 data_elem["isbn10"] = item.isbn10
111 data_elem["image_urls"] = item.image_urls
112 if "DESCRIPTION" in item.data: 112 ↛ 114line 112 didn't jump to line 114, because the condition on line 112 was never false
113 data_elem["DESCRIPTION"] = item.data["DESCRIPTION"]
114 if "DIMENSION" in item.data: 114 ↛ 116line 114 didn't jump to line 116, because the condition on line 114 was never false
115 data_elem["DIMENSION"] = item.data["DIMENSION"]
116 data[item.isbn] = data_elem
118 return data
120 def find_item(self, isbn):
121 for item in self.items:
122 if item.isbn == isbn:
123 return item
124 return None
126 def get_items_with_image_urls(self):
127 # WARNING: this looks a scraped urls to determine if the item has images.
128 # Images may be retrieved from GCloud storage. So, there may be cases
129 # where this method of searching leads to false positives/negatives.
130 items_with_images = []
131 for item in self.items:
132 if item.image_urls != []: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true
133 items_with_images.append(item)
134 return items_with_images