Coverage for amazonorders/session.py: 92.36%
144 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 15:31 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 15:31 +0000
1import logging
2import os
3from io import BytesIO
5from PIL import Image
6from bs4 import BeautifulSoup
7from requests import Session
9__author__ = "Alex Laird"
10__copyright__ = "Copyright 2024, Alex Laird"
11__version__ = "0.0.5"
13from amazonorders.exception import AmazonOrdersAuthError
15logger = logging.getLogger(__name__)
17BASE_URL = "https://www.amazon.com"
18BASE_HEADERS = {
19 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
20 "Accept-Encoding": "gzip, deflate, br",
21 "Accept-Language": "en-US,en;q=0.9",
22 "Cache-Control": "max-age=0",
23 "Content-Type": "application/x-www-form-urlencoded",
24 "Origin": BASE_URL,
25 "Referer": "{}/ap/signin".format(BASE_URL),
26 "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
27 "Sec-Ch-Ua-Mobile": "?0",
28 "Sec-Ch-Ua-Platform": "macOS",
29 "Sec-Ch-Viewport-Width": "1393",
30 "Sec-Fetch-Dest": "document",
31 "Sec-Fetch-Mode": "navigate",
32 "Sec-Fetch-Site": "same-origin",
33 "Sec-Fetch-User": "?1",
34 "Viewport-Width": "1393",
35 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
36}
37SIGN_IN_FORM_NAME = "signIn"
38MFA_DEVICE_SELECT_FORM_ID = "auth-select-device-form"
39MFA_FORM_ID = "auth-mfa-form"
40CAPTCHA_1_DIV_ID = "cvf-page-content"
41CAPTCHA_1_FORM_CLASS = "cvf-widget-form"
42CAPTCHA_2_INPUT_ID = "captchacharacters"
45class AmazonSession:
46 def __init__(self,
47 username,
48 password,
49 debug=False,
50 max_auth_attempts=10) -> None:
51 self.username = username
52 self.password = password
54 self.debug = debug
55 if self.debug:
56 logger.setLevel(logging.DEBUG)
57 self.max_auth_attempts = max_auth_attempts
59 self.session = Session()
60 self.last_response = None
61 self.last_response_parsed = None
62 self.is_authenticated = False
64 def request(self, method, url, **kwargs):
65 if "headers" not in kwargs:
66 kwargs["headers"] = {}
67 kwargs["headers"].update(BASE_HEADERS)
69 logger.debug("{} request to {}".format(method, url))
71 self.last_response = self.session.request(method, url, **kwargs)
72 self.last_response_parsed = BeautifulSoup(self.last_response.text,
73 "html.parser")
75 logger.debug("Response: {} - {}".format(self.last_response.url,
76 self.last_response.status_code))
78 if self.debug:
79 page_name = self._get_page_from_url(self.last_response.url)
80 with open(page_name, "w", encoding="utf-8") as html_file:
81 logger.debug("Response written to file: {}".format(html_file.name))
82 html_file.write(self.last_response.text)
84 return self.last_response
86 def get(self, url, **kwargs):
87 return self.request("GET", url, **kwargs)
89 def post(self, url, **kwargs):
90 return self.request("POST", url, **kwargs)
92 def login(self):
93 self.get("{}/gp/sign-in.html".format(BASE_URL))
95 attempts = 0
96 while not self.is_authenticated and attempts < self.max_auth_attempts:
97 if self._is_field_found(SIGN_IN_FORM_NAME):
98 self._sign_in()
99 elif self._is_field_found(CAPTCHA_1_FORM_CLASS, field_key="class"):
100 self._captcha_1_submit()
101 elif self.last_response_parsed.find("input",
102 id=lambda value: value and value.startswith(CAPTCHA_2_INPUT_ID)):
103 self._captcha_2_submit()
104 elif self._is_field_found(MFA_DEVICE_SELECT_FORM_ID, field_key="id"):
105 self._mfa_device_select()
106 elif self._is_field_found(MFA_FORM_ID, field_key="id"):
107 self._mfa_submit()
108 else:
109 raise AmazonOrdersAuthError(
110 "An error occurred, this is an unknown page: {}. To capture the page to a file, set the `debug` flag.".format(
111 self.last_response.url))
113 if "Hello, sign in" not in self.last_response.text and "nav-item-signout" in self.last_response.text:
114 self.is_authenticated = True
115 else:
116 attempts += 1
118 if attempts == self.max_auth_attempts:
119 raise AmazonOrdersAuthError(
120 "Max authentication flow attempts reached.")
122 def logout(self):
123 self.get("{}/gp/sign-out.html".format(BASE_URL))
125 self.close()
127 def close(self):
128 self.session.close()
130 def _sign_in(self):
131 form = self.last_response_parsed.find("form", {"name": SIGN_IN_FORM_NAME})
132 data = self._build_from_form(form,
133 additional_attrs={"email": self.username,
134 "password": self.password,
135 "rememberMe": "true"})
137 self.request(form.attrs.get("method", "GET"),
138 self._get_form_action(form),
139 data=data)
141 self._handle_errors(critical=True)
143 def _mfa_device_select(self):
144 form = self.last_response_parsed.find("form",
145 {"id": MFA_DEVICE_SELECT_FORM_ID})
146 contexts = form.find_all("input", {"name": "otpDeviceContext"})
147 i = 1
148 for field in contexts:
149 print("{}: {}".format(i, field.attrs["value"].strip()))
150 i += 1
151 otp_device = int(
152 input("Where would you like your one-time passcode sent? "))
154 form = self.last_response_parsed.find("form", id=MFA_DEVICE_SELECT_FORM_ID)
155 data = self._build_from_form(form,
156 additional_attrs={"otpDeviceContext":
157 contexts[otp_device - 1].attrs[
158 "value"]})
160 self.request(form.attrs.get("method", "GET"),
161 self._get_form_action(form),
162 data=data)
164 self._handle_errors()
166 def _mfa_submit(self):
167 otp = input("Enter the one-time passcode sent to your device: ")
169 # TODO: figure out why Amazon doesn't respect rememberDevice
170 form = self.last_response_parsed.find("form", id=MFA_FORM_ID)
171 data = self._build_from_form(form,
172 additional_attrs={"otpCode": otp, "rememberDevice": ""})
174 self.request(form.attrs.get("method", "GET"),
175 self._get_form_action(form),
176 data=data)
178 self._handle_errors()
180 def _captcha_1_submit(self):
181 captcha = self.last_response_parsed.find("div", {"id": CAPTCHA_1_DIV_ID})
183 img_src = captcha.find("img", {"alt": "captcha"}).attrs["src"]
184 img_response = self.session.get(img_src)
185 img = Image.open(BytesIO(img_response.content))
186 img.show()
188 captcha_response = input("Enter the Captcha seen on the opened image: ")
190 form = self.last_response_parsed.find("form", {"class": CAPTCHA_1_FORM_CLASS})
191 data = self._build_from_form(form,
192 additional_attrs={"cvf_captcha_input": captcha_response})
194 self.request(form.attrs.get("method", "GET"),
195 self._get_form_action(form,
196 prefix="{}/ap/cvf/".format(BASE_URL)),
197 data=data)
199 self._handle_errors("cvf-widget-alert", "class")
201 def _captcha_2_submit(self):
202 form = self.last_response_parsed.find("input",
203 id=lambda value: value and value.startswith(
204 CAPTCHA_2_INPUT_ID)).find_parent("form")
206 img_src = form.find("img").attrs["src"]
207 img_response = self.session.get(img_src)
208 img = Image.open(BytesIO(img_response.content))
209 img.show()
211 captcha_response = input("Enter the Captcha seen on the opened image: ")
213 data = self._build_from_form(form,
214 additional_attrs={"field-keywords": captcha_response})
216 self.request(form.attrs.get("method", "GET"),
217 self._get_form_action(form,
218 prefix=BASE_URL),
219 params=data)
221 self._handle_errors("a-alert-info", "class")
223 def _build_from_form(self, form, additional_attrs=None):
224 data = {}
225 for field in form.find_all("input"):
226 try:
227 data[field["name"]] = field["value"]
228 except:
229 pass
230 if additional_attrs:
231 data.update(additional_attrs)
232 return data
234 def _get_form_action(self, form, prefix=None):
235 action = form.attrs.get("action")
236 if not action:
237 action = self.last_response.url
238 if prefix and not action.startswith("http"):
239 action = prefix + action
240 return action
242 def _is_field_found(self, field_value, field_type="form", field_key="name"):
243 return self.last_response_parsed.find(field_type, {
244 field_key: field_value}) is not None
246 def _get_page_from_url(self, url):
247 page_name = url.rsplit("/", 1)[-1].split("?")[0]
248 page_name.strip(".html")
249 i = 0
250 while os.path.isfile("{}_{}".format(page_name, 0)):
251 i += 1
252 return "{}_{}.html".format(page_name, i)
254 def _handle_errors(self, error_div="auth-error-message-box", attr_name="id",
255 critical=False):
256 error_div = self.last_response_parsed.find("div",
257 {attr_name: error_div})
258 if error_div:
259 error_msg = "An error occurred: {}".format(error_div.text.strip())
261 if critical:
262 raise AmazonOrdersAuthError(error_msg)
263 else:
264 print(error_msg)