seleniumuser.seleniumuser

  1import atexit
  2import os
  3import random
  4import sys
  5import time
  6from pathlib import Path
  7from types import LambdaType
  8from typing import Any
  9from warnings import warn
 10
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from selenium import webdriver
 14from selenium.webdriver.chrome.options import Options as ChromeOptions
 15from selenium.webdriver.chrome.service import Service as ChromeService
 16from selenium.webdriver.common.by import By
 17from selenium.webdriver.common.keys import Keys
 18from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 19from selenium.webdriver.firefox.options import Options as FirefoxOptions
 20from selenium.webdriver.firefox.service import Service as FirefoxService
 21from selenium.webdriver.remote.webelement import WebElement
 22from selenium.webdriver.support.ui import Select
 23from voxscribe import get_text_from_url
 24from whosyouragent import get_agent
 25
 26
 27class User:
 28    """Sits on top of selenium to streamline
 29    automation and scraping tasks."""
 30
 31    def __init__(
 32        self,
 33        headless: bool = False,
 34        browser_type: str = "firefox",
 35        implicit_wait: int = 10,
 36        page_load_timeout: int = 60,
 37        open_browser: bool = True,
 38        locator_method: str = "xpath",
 39        randomize_user_agent: bool = True,
 40        user_agent_rotation_period: int = None,
 41        move_window_by: tuple[int, int] = (0, -1000),
 42        download_dir: str | Path = None,
 43        driver_path: str | Path = None,
 44    ):
 45        """
 46        :param headless: If True, browser window will not be visible.
 47
 48        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 49
 50        :param implicit_wait: Number of seconds to look for a specified element before
 51        selenium considers it missing and throws an exception.
 52
 53        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 54        before throwing an exception.
 55
 56        :param open_browser: If True, opens a browser window when a User object is created.
 57        If False, a manual call to self.open_browser() must be made.
 58
 59        :param locator_method: The locator type User should expect to be given.
 60        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 61        Every member function with a 'locator' argument refers to a string matching
 62        the current locator_method.
 63
 64        :param randomize_user_agent: If True, a random useragent will be used whenever
 65        the browser is opened. If False, the native useragent will be used.
 66
 67        :param user_agent_rotation_period: If not None, the browser window will be closed
 68        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 69        Rotation occurs on the first call to self.get() after the time period has elapsed.
 70        Ignored if randomize_user_agent is False.
 71
 72        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 73
 74        :param download_dir: The download folder to use. If None, the default folder will be used.
 75
 76        :param driver_path: The path to the webdriver executable selenium should use.
 77        If None, the system PATH will be checked for the executable.
 78        If the executable isn't found, the parent directories and the immediate child directories
 79        of the current working directory will be searched.
 80        """
 81        self.headless = headless
 82        browser_type = browser_type.lower()
 83        if browser_type in ["firefox", "chrome"]:
 84            self.browser_type = browser_type
 85        else:
 86            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 87        self.browser_open = False
 88        self.implicit_wait = implicit_wait
 89        self.page_load_timeout = page_load_timeout
 90        self.rotation_timer = Timer()
 91        self.timer = Timer()
 92        self.timer.start()
 93        self.randomize_user_agent = randomize_user_agent
 94        self.user_agent_rotation_period = user_agent_rotation_period
 95        self.locator_method = locator_method
 96        self.turbo()
 97        self.keys = Keys
 98        self.move_window_by = move_window_by
 99        self.download_dir = download_dir
100        self.driver_path = driver_path
101        if not self.driver_path:
102            self.search_for_driver()
103        if open_browser:
104            self.open_browser()
105        else:
106            self.browser = None
107        atexit.register(self.close_browser)
108
109    def __enter__(self):
110        return self
111
112    def __exit__(self, *args):
113        self.close_browser()
114
115    def configure_firefox(self) -> FirefoxService:
116        """Configure options and profile for firefox."""
117        self.options = FirefoxOptions()
118        self.options.headless = self.headless
119        self.options.set_preference(
120            "widget.windows.window_occlusion_tracking.enabled", False
121        )
122        self.options.set_preference("dom.webaudio.enabled", False)
123        if self.randomize_user_agent:
124            self.options.set_preference("general.useragent.override", get_agent())
125        if self.download_dir:
126            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
127            self.profile = FirefoxProfile()
128            self.profile.set_preference("browser.download.dir", str(self.download_dir))
129            self.profile.set_preference("browser.download.folderList", 2)
130        else:
131            self.profile = None
132        self.service = FirefoxService(
133            executable_path=str(self.driver_path), log_path=os.devnull
134        )
135
136    def configure_chrome(self) -> ChromeService:
137        """Configure options and profile for chrome."""
138        self.options = ChromeOptions()
139        self.options.headless = self.headless
140        self.options.add_argument("--disable-blink-features=AutomationControlled")
141        self.options.add_argument("--mute-audio")
142        self.options.add_argument("--disable-infobars")
143        self.options.add_argument("--disable-notifications")
144        self.options.add_argument("--log-level=3")
145        if self.randomize_user_agent:
146            self.options.add_argument(f"--user-agent={get_agent()}")
147        self.options.add_experimental_option("useAutomationExtension", False)
148        if self.download_dir:
149            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
150            self.options.add_experimental_option(
151                "prefs", {"download.default_directory": str(self.download_dir)}
152            )
153        self.service = ChromeService(
154            executable_path=str(self.driver_path), log_path=os.devnull
155        )
156
157    def search_for_driver(self):
158        """Searches for the webdriver executable."""
159        cwd = Path.cwd()
160        found = False
161        match self.browser_type:
162            case "firefox":
163                driver = "geckodriver.exe"
164            case "chrome":
165                driver = "chromedriver.exe"
166        # search PATH
167        env_path = os.environ["PATH"]
168        if sys.platform == "win32":
169            env_paths = env_path.split(";")
170        else:
171            env_paths = env_path.split(":")
172            driver = driver[: driver.find(".")]
173        for path in env_paths:
174            if (Path(path) / driver).exists():
175                self.driver_path = Path(path) / driver
176                found = True
177                break
178        # check current working directory and parent folders
179        if not found:
180            while cwd != cwd.parent:
181                if (cwd / driver).exists():
182                    self.driver_path = cwd / driver
183                    found = True
184                    break
185                cwd = cwd.parent
186            # check top most level
187            if not found and (cwd / driver).exists():
188                self.driver_path = cwd / driver
189                found = True
190        # check child folders (only 1 level down)
191        if not found:
192            for child in Path.cwd().iterdir():
193                if child.is_dir() and (child / driver).exists():
194                    self.driver_path = child / driver
195                    found = True
196        if not found:
197            warn(f"Could not find {driver}")
198
199    def set_implicit_wait(self, wait_time: int = None):
200        """Sets to default time if no arg given."""
201        if not wait_time:
202            self.browser.implicitly_wait(self.implicit_wait)
203        else:
204            self.browser.implicitly_wait(wait_time)
205
206    def open_browser(self):
207        """Configures and opens selenium browser."""
208        if not self.browser_open:
209            match self.browser_type:
210                case "firefox":
211                    self.configure_firefox()
212                    self.browser = webdriver.Firefox(
213                        options=self.options,
214                        service=self.service,
215                        firefox_profile=self.profile,
216                    )
217                case "chrome":
218                    self.configure_chrome()
219                    self.browser = webdriver.Chrome(
220                        options=self.options, service=self.service
221                    )
222            self.set_implicit_wait()
223            self.browser.maximize_window()
224            self.browser.set_window_position(
225                self.move_window_by[0], self.move_window_by[1]
226            )
227            self.browser.maximize_window()
228            self.browser.set_page_load_timeout(self.page_load_timeout)
229            self.browser_open = True
230            self.tab_index = 0
231            self.rotation_timer.start()
232        else:
233            warn("Browser already open.")
234
235    def close_browser(self):
236        """Close browser window."""
237        if self.browser_open:
238            self.browser_open = False
239            self.browser.quit()
240        (Path.cwd() / "atexit_worked.txt").touch()
241
242    def open_tab(self, url: str = "", switch_to_tab: bool = True):
243        """Opens new tab and, if provided, goes to url.
244
245        New tab is inserted after currently active tab."""
246        self.script("window.open(arguments[0]);", url)
247        if switch_to_tab:
248            self.switch_to_tab(self.tab_index + 1)
249
250    def switch_to_tab(self, tab_index: int):
251        """Switch to a tab in browser, zero indexed."""
252        self.browser.switch_to.window(self.browser.window_handles[tab_index])
253        self.tab_index = tab_index
254
255    def get_num_tabs(self) -> int:
256        """Returns number of tabs open."""
257        return len(self.browser.window_handles)
258
259    def close_tab(self, tab_index: int = 1):
260        """Close specified tab and
261        switches to tab index 0."""
262        self.switch_to_tab(tab_index)
263        self.browser.close()
264        self.switch_to_tab(0)
265
266    def get(self, url: str):
267        """Requests webpage at given url and rotates userAgent if necessary."""
268        if not self.browser_open:
269            self.open_browser()
270        if (
271            self.randomize_user_agent
272            and self.user_agent_rotation_period is not None
273            and self.rotation_timer.check(format=False)
274            > (60 * self.user_agent_rotation_period)
275        ):
276            self.rotation_timer.stop()
277            self.close_browser()
278            self.open_browser()
279        self.browser.get(url)
280        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
281        self.chill(self.arrival_wait)
282
283    def get_soup(self) -> BeautifulSoup:
284        """Returns a BeautifulSoup object
285        of the current page source."""
286        return BeautifulSoup(self.browser.page_source, "html.parser")
287
288    def current_url(self) -> str:
289        """Returns current url of active tab."""
290        return self.browser.current_url
291
292    def delete_cookies(self):
293        """Delete all cookies for
294        this browser instance."""
295        self.browser.delete_all_cookies()
296
297    def turbo(self, engage: bool = True):
298        """When engaged, strings will be sent
299        to elements all at once and there will be
300        no waiting after actions.
301
302        When disengaged, strings will be sent to elements
303        'one key at a time' with randomized amounts of
304        time between successive keys and after actions."""
305        if engage:
306            self.after_key_wait = (0, 0)
307            self.after_field_wait = (0, 0)
308            self.after_click_wait = (0, 0)
309            self.arrival_wait = (1, 1)
310            self.one_key_at_a_time = False
311            self.turbo_engaged = True
312        else:
313            self.after_key_wait = (0.1, 0.5)
314            self.after_field_wait = (1, 2)
315            self.after_click_wait = (0.25, 1.5)
316            self.arrival_wait = (4, 10)
317            self.one_key_at_a_time = True
318            self.turbo_engaged = False
319
320    def chill(self, min_max: tuple[float, float]):
321        """Sleeps a random amount
322        between min_max[0] and min_max[1]."""
323        time.sleep(random.uniform(min_max[0], min_max[1]))
324
325    def script(self, script: str, args: Any = None) -> Any:
326        """Execute javascript code and returns result."""
327        return self.browser.execute_script(script, args)
328
329    def remove(self, locator: str):
330        """Removes element from DOM."""
331        self.script("arguments[0].remove();", self.find(locator))
332
333    def get_length(self, locator: str) -> int:
334        """Returns number of child elements for a given element."""
335        return int(self.script("return arguments[0].length;", self.find(locator)))
336
337    def find(self, locator: str) -> WebElement:
338        """Finds and returns a WebElement."""
339        match self.locator_method:
340            case "xpath":
341                return self.browser.find_element(By.XPATH, locator)
342            case "id":
343                return self.browser.find_element(By.ID, locator)
344            case "className":
345                return self.browser.find_element(By.CLASS_NAME, locator)
346            case "name":
347                return self.browser.find_element(By.NAME, locator)
348            case "cssSelector":
349                return self.browser.find_element(By.CSS_SELECTOR, locator)
350
351    def find_children(self, locator: str) -> list[WebElement]:
352        """Returns a list of child WebElements
353        for given locator arg."""
354        element = self.find(locator)
355        return element.find_elements("xpath", "./*")
356
357    def scroll(self, amount: int = None, fraction: float = None):
358        """Scroll web page.
359        :param amount: The number of lines to scroll if not None.
360
361        :param fraction: The amount between 0.0 and 1.0
362        of the page height to scroll.
363
364        If values are provided for both arguments,
365        amount will be used.
366
367        If values are provided for neither argument,
368        the entire page length will be scrolled.
369
370        Scrolls one line at a time if self.turbo is False."""
371        if amount:
372            amount_to_scroll = amount
373        elif fraction:
374            amount_to_scroll = int(
375                fraction
376                * (
377                    int(self.script("return document.body.scrollHeight;"))
378                    - int(self.script("return window.pageYOffset;"))
379                )
380            )
381        else:
382            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
383        if self.turbo_engaged:
384            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
385        else:
386            for _ in range(abs(amount_to_scroll)):
387                if amount_to_scroll >= 0:
388                    self.script("window.scrollBy(0,1);")
389                else:
390                    self.script("window.scrollBy(0,-1);")
391        self.chill(self.after_click_wait)
392
393    def scroll_into_view(self, locator: str) -> WebElement:
394        """Scrolls to a given element and returns the element."""
395        element = self.find(locator)
396        self.script("arguments[0].scroll_into_view();", element)
397        self.chill(self.after_click_wait)
398        return element
399
400    def text(self, locator: str) -> str:
401        """Returns text of WebElement."""
402        return self.find(locator).text
403
404    def click(self, locator: str) -> WebElement:
405        """Clicks on and returns WebElement."""
406        element = self.find(locator)
407        element.click()
408        self.chill(self.after_click_wait)
409        return element
410
411    def clear(self, locator: str) -> WebElement:
412        """Clears content of WebElement if able
413        and then returns WebElement."""
414        element = self.find(locator)
415        element.clear()
416        self.chill(self.after_click_wait)
417        return element
418
419    def switch_to_iframe(self, locator: str):
420        """Switch to an iframe from given locator."""
421        self.browser.switch_to.frame(self.find(locator))
422
423    def switch_to_parent_frame(self):
424        """Move up a frame level from current frame."""
425        self.browser.switch_to.parent_frame()
426
427    def select(
428        self, locator: str, method: str, choice: str | int | tuple
429    ) -> WebElement:
430        """Select a choice from Select element.
431        Returns the Select element from the locator string,
432        not the option element that is selected.
433
434        :param method: Can be 'value' or 'index'
435
436        :param choice: The option to select.
437
438        If method is 'value', then choice should be
439        the html 'value' attribute of the desired option.
440
441        If method is 'index', choice can either be a single
442        int for the desired option or it can be a two-tuple.
443        If the tuple is provided, a random option between the
444        two indicies (inclusive) will be selected."""
445        element = self.click(locator)
446        match method:
447            case "value":
448                Select(element).select_by_value(choice)
449            case "index":
450                if type(choice) == tuple:
451                    choice = random.randint(choice[0], choice[1])
452                Select(element).select_by_index(choice)
453        self.chill(self.after_field_wait)
454        return element
455
456    def click_elements(
457        self, locators: list[str], max_selections: int = None, min_selections: int = 1
458    ) -> WebElement:
459        """Click a random number of WebElements
460        and return the last WebElement clicked.
461
462        :param locators: A list of element locators to choose from.
463
464        :param max_selections: The maximum number of elements to click.
465        If None, the maximum will be the length of the locators list.
466
467        :param min_selections: The minimum number of elements to click.
468
469        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
470        will click between 1 and 3 random elements from the list.
471        """
472        if not max_selections:
473            max_selections = len(locators)
474        for option in random.sample(
475            locators, k=random.randint(min_selections, max_selections)
476        ):
477            element = self.click(option)
478        return element
479
480    def get_click_list(
481        self, num_options: int, max_choices: int = 1, min_choices: int = 1
482    ) -> list[str]:
483        """Similar to self.click_elements(), but for use with the self.fill_next() method.
484
485        Creates a list of length 'num_options' where every element is 'skip'.
486
487        A random number of elements in the list between 'min_choices' and 'max_choices' are
488        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
489        click_list = ["skip"] * num_options
490        selected_indexes = []
491        for i in range(random.randint(min_choices, max_choices)):
492            index = random.randint(0, num_options - 1)
493            while index in selected_indexes:
494                index = random.randint(0, num_options - 1)
495            selected_indexes.append(index)
496            click_list[index] = self.keys.SPACE
497        return click_list
498
499    def send_keys(
500        self,
501        locator: str,
502        data: str,
503        click_first: bool = True,
504        clear_first: bool = False,
505    ) -> WebElement:
506        """Types data into element and returns the element.
507
508        :param data: The string to send to the element.
509
510        :param click_first: If True, the element is clicked on
511        before the data is sent.
512
513        :param clear_first: If True, the current text of the element
514        is cleared before the data is sent."""
515        element = self.click(locator) if click_first else self.find(locator)
516        if clear_first:
517            element.clear()
518            self.chill(self.after_click_wait)
519        if self.one_key_at_a_time:
520            for ch in str(data):
521                element.send_keys(ch)
522                self.chill(self.after_key_wait)
523        else:
524            element.send_keys(str(data))
525        self.chill(self.after_field_wait)
526        return element
527
528    def fill_next(
529        self, data: list[str | tuple], start_element: WebElement = None
530    ) -> WebElement:
531        """Fills a form by tabbing from the current WebElement
532        to the next one and using the corresponding item in data.
533        Returns the last WebElement.
534
535        :param data: A list of form data. If an item is a string (except for 'skip')
536        it will be typed into the current WebElement.
537
538        An item in data can be a two-tuple of the form
539        ('downArrow', numberOfPresses:int|tuple[int, int]).
540
541        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
542        that many times to the WebElement.
543
544        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
545        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
546        This is typically for use with Select elements.
547
548        An item in data can also be 'skip', which will perform no action on the current
549        WebElement and will continue to the next one.
550
551        :param start_element: The WebElement to start tabbing from.
552        The currently active element will be used if start_element is None.
553
554        Note: The function tabs to the next element before sending data,
555        so the start_element should the WebElement before the one
556        that should receive data[0].
557        """
558        element = (
559            self.browser.switch_to.active_element
560            if not start_element
561            else start_element
562        )
563        for datum in data:
564            element.send_keys(Keys.TAB)
565            element = self.browser.switch_to.active_element
566            self.chill(self.after_key_wait)
567            if datum[0] == "downArrow":
568                if type(datum[1]) == tuple:
569                    times = random.randint(datum[1][0], datum[1][1])
570                else:
571                    times = datum[1]
572                for _ in range(times):
573                    element.send_keys(Keys.ARROW_DOWN)
574                    self.chill(self.after_key_wait)
575            elif datum == "skip":
576                self.chill(self.after_key_wait)
577            else:
578                if self.turbo_engaged:
579                    element.send_keys(str(datum))
580                else:
581                    for ch in str(datum):
582                        element.send_keys(ch)
583                        self.chill(self.after_key_wait)
584            self.chill(self.after_field_wait)
585        return element
586
587    def wait_until(
588        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
589    ):
590        """Checks condition repeatedly until either it is true,
591        or the max_wait is exceeded.
592
593        Raises a TimeoutError if the condition doesn't success within max_wait.
594
595        Useful for determing whether a form has been successfully submitted.
596
597        :param condition: The condition function to check.
598
599        :param max_wait: Number of seconds to continue checking condition
600        before throwing a TimeoutError.
601
602        :param polling_interval: The number of seconds to sleep before
603        checking the condition function again after it fails.
604
605        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
606        start_time = time.time()
607        while True:
608            try:
609                if condition():
610                    time.sleep(1)
611                    break
612                elif (time.time() - start_time) > max_wait:
613                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
614                else:
615                    time.sleep(polling_interval)
616            except:
617                if (time.time() - start_time) > max_wait:
618                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
619                else:
620                    time.sleep(polling_interval)
621
622    def dismiss_alert(self):
623        """Dismiss alert dialog."""
624        self.browser.switch_to.alert.dismiss()
625
626    def solve_recaptcha_v3(
627        self,
628        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
629        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
630    ):
631        """Pass google recaptcha v3 by solving an audio puzzle.
632
633        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
634        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
635        pass None to this argument.
636
637        """
638        locator_method = self.locator_method
639        self.locator_method = "xpath"
640        try:
641            if outer_iframe_xpath:
642                self.switch_to_iframe(outer_iframe_xpath)
643                self.click('//*[@id="recaptcha-anchor"]')
644                self.switch_to_parent_frame()
645            self.switch_to_iframe(inner_iframe_xpath)
646            self.click('//*[@id="recaptcha-audio-button"]')
647            mp3_url = self.find(
648                '//a[@class="rc-audiochallenge-tdownload-link"]'
649            ).get_attribute("href")
650            text = get_text_from_url(mp3_url, ".mp3")
651            self.send_keys('//*[@id="audio-response"]', text)
652            self.click('//*[@id="recaptcha-verify-button"]')
653        except Exception as e:
654            print(e)
655            raise Exception("Could not solve captcha")
656        finally:
657            self.switch_to_parent_frame()
658            self.locator_method = locator_method
class User:
 28class User:
 29    """Sits on top of selenium to streamline
 30    automation and scraping tasks."""
 31
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.timer = Timer()
 93        self.timer.start()
 94        self.randomize_user_agent = randomize_user_agent
 95        self.user_agent_rotation_period = user_agent_rotation_period
 96        self.locator_method = locator_method
 97        self.turbo()
 98        self.keys = Keys
 99        self.move_window_by = move_window_by
100        self.download_dir = download_dir
101        self.driver_path = driver_path
102        if not self.driver_path:
103            self.search_for_driver()
104        if open_browser:
105            self.open_browser()
106        else:
107            self.browser = None
108        atexit.register(self.close_browser)
109
110    def __enter__(self):
111        return self
112
113    def __exit__(self, *args):
114        self.close_browser()
115
116    def configure_firefox(self) -> FirefoxService:
117        """Configure options and profile for firefox."""
118        self.options = FirefoxOptions()
119        self.options.headless = self.headless
120        self.options.set_preference(
121            "widget.windows.window_occlusion_tracking.enabled", False
122        )
123        self.options.set_preference("dom.webaudio.enabled", False)
124        if self.randomize_user_agent:
125            self.options.set_preference("general.useragent.override", get_agent())
126        if self.download_dir:
127            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
128            self.profile = FirefoxProfile()
129            self.profile.set_preference("browser.download.dir", str(self.download_dir))
130            self.profile.set_preference("browser.download.folderList", 2)
131        else:
132            self.profile = None
133        self.service = FirefoxService(
134            executable_path=str(self.driver_path), log_path=os.devnull
135        )
136
137    def configure_chrome(self) -> ChromeService:
138        """Configure options and profile for chrome."""
139        self.options = ChromeOptions()
140        self.options.headless = self.headless
141        self.options.add_argument("--disable-blink-features=AutomationControlled")
142        self.options.add_argument("--mute-audio")
143        self.options.add_argument("--disable-infobars")
144        self.options.add_argument("--disable-notifications")
145        self.options.add_argument("--log-level=3")
146        if self.randomize_user_agent:
147            self.options.add_argument(f"--user-agent={get_agent()}")
148        self.options.add_experimental_option("useAutomationExtension", False)
149        if self.download_dir:
150            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
151            self.options.add_experimental_option(
152                "prefs", {"download.default_directory": str(self.download_dir)}
153            )
154        self.service = ChromeService(
155            executable_path=str(self.driver_path), log_path=os.devnull
156        )
157
158    def search_for_driver(self):
159        """Searches for the webdriver executable."""
160        cwd = Path.cwd()
161        found = False
162        match self.browser_type:
163            case "firefox":
164                driver = "geckodriver.exe"
165            case "chrome":
166                driver = "chromedriver.exe"
167        # search PATH
168        env_path = os.environ["PATH"]
169        if sys.platform == "win32":
170            env_paths = env_path.split(";")
171        else:
172            env_paths = env_path.split(":")
173            driver = driver[: driver.find(".")]
174        for path in env_paths:
175            if (Path(path) / driver).exists():
176                self.driver_path = Path(path) / driver
177                found = True
178                break
179        # check current working directory and parent folders
180        if not found:
181            while cwd != cwd.parent:
182                if (cwd / driver).exists():
183                    self.driver_path = cwd / driver
184                    found = True
185                    break
186                cwd = cwd.parent
187            # check top most level
188            if not found and (cwd / driver).exists():
189                self.driver_path = cwd / driver
190                found = True
191        # check child folders (only 1 level down)
192        if not found:
193            for child in Path.cwd().iterdir():
194                if child.is_dir() and (child / driver).exists():
195                    self.driver_path = child / driver
196                    found = True
197        if not found:
198            warn(f"Could not find {driver}")
199
200    def set_implicit_wait(self, wait_time: int = None):
201        """Sets to default time if no arg given."""
202        if not wait_time:
203            self.browser.implicitly_wait(self.implicit_wait)
204        else:
205            self.browser.implicitly_wait(wait_time)
206
207    def open_browser(self):
208        """Configures and opens selenium browser."""
209        if not self.browser_open:
210            match self.browser_type:
211                case "firefox":
212                    self.configure_firefox()
213                    self.browser = webdriver.Firefox(
214                        options=self.options,
215                        service=self.service,
216                        firefox_profile=self.profile,
217                    )
218                case "chrome":
219                    self.configure_chrome()
220                    self.browser = webdriver.Chrome(
221                        options=self.options, service=self.service
222                    )
223            self.set_implicit_wait()
224            self.browser.maximize_window()
225            self.browser.set_window_position(
226                self.move_window_by[0], self.move_window_by[1]
227            )
228            self.browser.maximize_window()
229            self.browser.set_page_load_timeout(self.page_load_timeout)
230            self.browser_open = True
231            self.tab_index = 0
232            self.rotation_timer.start()
233        else:
234            warn("Browser already open.")
235
236    def close_browser(self):
237        """Close browser window."""
238        if self.browser_open:
239            self.browser_open = False
240            self.browser.quit()
241        (Path.cwd() / "atexit_worked.txt").touch()
242
243    def open_tab(self, url: str = "", switch_to_tab: bool = True):
244        """Opens new tab and, if provided, goes to url.
245
246        New tab is inserted after currently active tab."""
247        self.script("window.open(arguments[0]);", url)
248        if switch_to_tab:
249            self.switch_to_tab(self.tab_index + 1)
250
251    def switch_to_tab(self, tab_index: int):
252        """Switch to a tab in browser, zero indexed."""
253        self.browser.switch_to.window(self.browser.window_handles[tab_index])
254        self.tab_index = tab_index
255
256    def get_num_tabs(self) -> int:
257        """Returns number of tabs open."""
258        return len(self.browser.window_handles)
259
260    def close_tab(self, tab_index: int = 1):
261        """Close specified tab and
262        switches to tab index 0."""
263        self.switch_to_tab(tab_index)
264        self.browser.close()
265        self.switch_to_tab(0)
266
267    def get(self, url: str):
268        """Requests webpage at given url and rotates userAgent if necessary."""
269        if not self.browser_open:
270            self.open_browser()
271        if (
272            self.randomize_user_agent
273            and self.user_agent_rotation_period is not None
274            and self.rotation_timer.check(format=False)
275            > (60 * self.user_agent_rotation_period)
276        ):
277            self.rotation_timer.stop()
278            self.close_browser()
279            self.open_browser()
280        self.browser.get(url)
281        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
282        self.chill(self.arrival_wait)
283
284    def get_soup(self) -> BeautifulSoup:
285        """Returns a BeautifulSoup object
286        of the current page source."""
287        return BeautifulSoup(self.browser.page_source, "html.parser")
288
289    def current_url(self) -> str:
290        """Returns current url of active tab."""
291        return self.browser.current_url
292
293    def delete_cookies(self):
294        """Delete all cookies for
295        this browser instance."""
296        self.browser.delete_all_cookies()
297
298    def turbo(self, engage: bool = True):
299        """When engaged, strings will be sent
300        to elements all at once and there will be
301        no waiting after actions.
302
303        When disengaged, strings will be sent to elements
304        'one key at a time' with randomized amounts of
305        time between successive keys and after actions."""
306        if engage:
307            self.after_key_wait = (0, 0)
308            self.after_field_wait = (0, 0)
309            self.after_click_wait = (0, 0)
310            self.arrival_wait = (1, 1)
311            self.one_key_at_a_time = False
312            self.turbo_engaged = True
313        else:
314            self.after_key_wait = (0.1, 0.5)
315            self.after_field_wait = (1, 2)
316            self.after_click_wait = (0.25, 1.5)
317            self.arrival_wait = (4, 10)
318            self.one_key_at_a_time = True
319            self.turbo_engaged = False
320
321    def chill(self, min_max: tuple[float, float]):
322        """Sleeps a random amount
323        between min_max[0] and min_max[1]."""
324        time.sleep(random.uniform(min_max[0], min_max[1]))
325
326    def script(self, script: str, args: Any = None) -> Any:
327        """Execute javascript code and returns result."""
328        return self.browser.execute_script(script, args)
329
330    def remove(self, locator: str):
331        """Removes element from DOM."""
332        self.script("arguments[0].remove();", self.find(locator))
333
334    def get_length(self, locator: str) -> int:
335        """Returns number of child elements for a given element."""
336        return int(self.script("return arguments[0].length;", self.find(locator)))
337
338    def find(self, locator: str) -> WebElement:
339        """Finds and returns a WebElement."""
340        match self.locator_method:
341            case "xpath":
342                return self.browser.find_element(By.XPATH, locator)
343            case "id":
344                return self.browser.find_element(By.ID, locator)
345            case "className":
346                return self.browser.find_element(By.CLASS_NAME, locator)
347            case "name":
348                return self.browser.find_element(By.NAME, locator)
349            case "cssSelector":
350                return self.browser.find_element(By.CSS_SELECTOR, locator)
351
352    def find_children(self, locator: str) -> list[WebElement]:
353        """Returns a list of child WebElements
354        for given locator arg."""
355        element = self.find(locator)
356        return element.find_elements("xpath", "./*")
357
358    def scroll(self, amount: int = None, fraction: float = None):
359        """Scroll web page.
360        :param amount: The number of lines to scroll if not None.
361
362        :param fraction: The amount between 0.0 and 1.0
363        of the page height to scroll.
364
365        If values are provided for both arguments,
366        amount will be used.
367
368        If values are provided for neither argument,
369        the entire page length will be scrolled.
370
371        Scrolls one line at a time if self.turbo is False."""
372        if amount:
373            amount_to_scroll = amount
374        elif fraction:
375            amount_to_scroll = int(
376                fraction
377                * (
378                    int(self.script("return document.body.scrollHeight;"))
379                    - int(self.script("return window.pageYOffset;"))
380                )
381            )
382        else:
383            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
384        if self.turbo_engaged:
385            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
386        else:
387            for _ in range(abs(amount_to_scroll)):
388                if amount_to_scroll >= 0:
389                    self.script("window.scrollBy(0,1);")
390                else:
391                    self.script("window.scrollBy(0,-1);")
392        self.chill(self.after_click_wait)
393
394    def scroll_into_view(self, locator: str) -> WebElement:
395        """Scrolls to a given element and returns the element."""
396        element = self.find(locator)
397        self.script("arguments[0].scroll_into_view();", element)
398        self.chill(self.after_click_wait)
399        return element
400
401    def text(self, locator: str) -> str:
402        """Returns text of WebElement."""
403        return self.find(locator).text
404
405    def click(self, locator: str) -> WebElement:
406        """Clicks on and returns WebElement."""
407        element = self.find(locator)
408        element.click()
409        self.chill(self.after_click_wait)
410        return element
411
412    def clear(self, locator: str) -> WebElement:
413        """Clears content of WebElement if able
414        and then returns WebElement."""
415        element = self.find(locator)
416        element.clear()
417        self.chill(self.after_click_wait)
418        return element
419
420    def switch_to_iframe(self, locator: str):
421        """Switch to an iframe from given locator."""
422        self.browser.switch_to.frame(self.find(locator))
423
424    def switch_to_parent_frame(self):
425        """Move up a frame level from current frame."""
426        self.browser.switch_to.parent_frame()
427
428    def select(
429        self, locator: str, method: str, choice: str | int | tuple
430    ) -> WebElement:
431        """Select a choice from Select element.
432        Returns the Select element from the locator string,
433        not the option element that is selected.
434
435        :param method: Can be 'value' or 'index'
436
437        :param choice: The option to select.
438
439        If method is 'value', then choice should be
440        the html 'value' attribute of the desired option.
441
442        If method is 'index', choice can either be a single
443        int for the desired option or it can be a two-tuple.
444        If the tuple is provided, a random option between the
445        two indicies (inclusive) will be selected."""
446        element = self.click(locator)
447        match method:
448            case "value":
449                Select(element).select_by_value(choice)
450            case "index":
451                if type(choice) == tuple:
452                    choice = random.randint(choice[0], choice[1])
453                Select(element).select_by_index(choice)
454        self.chill(self.after_field_wait)
455        return element
456
457    def click_elements(
458        self, locators: list[str], max_selections: int = None, min_selections: int = 1
459    ) -> WebElement:
460        """Click a random number of WebElements
461        and return the last WebElement clicked.
462
463        :param locators: A list of element locators to choose from.
464
465        :param max_selections: The maximum number of elements to click.
466        If None, the maximum will be the length of the locators list.
467
468        :param min_selections: The minimum number of elements to click.
469
470        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
471        will click between 1 and 3 random elements from the list.
472        """
473        if not max_selections:
474            max_selections = len(locators)
475        for option in random.sample(
476            locators, k=random.randint(min_selections, max_selections)
477        ):
478            element = self.click(option)
479        return element
480
481    def get_click_list(
482        self, num_options: int, max_choices: int = 1, min_choices: int = 1
483    ) -> list[str]:
484        """Similar to self.click_elements(), but for use with the self.fill_next() method.
485
486        Creates a list of length 'num_options' where every element is 'skip'.
487
488        A random number of elements in the list between 'min_choices' and 'max_choices' are
489        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
490        click_list = ["skip"] * num_options
491        selected_indexes = []
492        for i in range(random.randint(min_choices, max_choices)):
493            index = random.randint(0, num_options - 1)
494            while index in selected_indexes:
495                index = random.randint(0, num_options - 1)
496            selected_indexes.append(index)
497            click_list[index] = self.keys.SPACE
498        return click_list
499
500    def send_keys(
501        self,
502        locator: str,
503        data: str,
504        click_first: bool = True,
505        clear_first: bool = False,
506    ) -> WebElement:
507        """Types data into element and returns the element.
508
509        :param data: The string to send to the element.
510
511        :param click_first: If True, the element is clicked on
512        before the data is sent.
513
514        :param clear_first: If True, the current text of the element
515        is cleared before the data is sent."""
516        element = self.click(locator) if click_first else self.find(locator)
517        if clear_first:
518            element.clear()
519            self.chill(self.after_click_wait)
520        if self.one_key_at_a_time:
521            for ch in str(data):
522                element.send_keys(ch)
523                self.chill(self.after_key_wait)
524        else:
525            element.send_keys(str(data))
526        self.chill(self.after_field_wait)
527        return element
528
529    def fill_next(
530        self, data: list[str | tuple], start_element: WebElement = None
531    ) -> WebElement:
532        """Fills a form by tabbing from the current WebElement
533        to the next one and using the corresponding item in data.
534        Returns the last WebElement.
535
536        :param data: A list of form data. If an item is a string (except for 'skip')
537        it will be typed into the current WebElement.
538
539        An item in data can be a two-tuple of the form
540        ('downArrow', numberOfPresses:int|tuple[int, int]).
541
542        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
543        that many times to the WebElement.
544
545        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
546        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
547        This is typically for use with Select elements.
548
549        An item in data can also be 'skip', which will perform no action on the current
550        WebElement and will continue to the next one.
551
552        :param start_element: The WebElement to start tabbing from.
553        The currently active element will be used if start_element is None.
554
555        Note: The function tabs to the next element before sending data,
556        so the start_element should the WebElement before the one
557        that should receive data[0].
558        """
559        element = (
560            self.browser.switch_to.active_element
561            if not start_element
562            else start_element
563        )
564        for datum in data:
565            element.send_keys(Keys.TAB)
566            element = self.browser.switch_to.active_element
567            self.chill(self.after_key_wait)
568            if datum[0] == "downArrow":
569                if type(datum[1]) == tuple:
570                    times = random.randint(datum[1][0], datum[1][1])
571                else:
572                    times = datum[1]
573                for _ in range(times):
574                    element.send_keys(Keys.ARROW_DOWN)
575                    self.chill(self.after_key_wait)
576            elif datum == "skip":
577                self.chill(self.after_key_wait)
578            else:
579                if self.turbo_engaged:
580                    element.send_keys(str(datum))
581                else:
582                    for ch in str(datum):
583                        element.send_keys(ch)
584                        self.chill(self.after_key_wait)
585            self.chill(self.after_field_wait)
586        return element
587
588    def wait_until(
589        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
590    ):
591        """Checks condition repeatedly until either it is true,
592        or the max_wait is exceeded.
593
594        Raises a TimeoutError if the condition doesn't success within max_wait.
595
596        Useful for determing whether a form has been successfully submitted.
597
598        :param condition: The condition function to check.
599
600        :param max_wait: Number of seconds to continue checking condition
601        before throwing a TimeoutError.
602
603        :param polling_interval: The number of seconds to sleep before
604        checking the condition function again after it fails.
605
606        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
607        start_time = time.time()
608        while True:
609            try:
610                if condition():
611                    time.sleep(1)
612                    break
613                elif (time.time() - start_time) > max_wait:
614                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
615                else:
616                    time.sleep(polling_interval)
617            except:
618                if (time.time() - start_time) > max_wait:
619                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
620                else:
621                    time.sleep(polling_interval)
622
623    def dismiss_alert(self):
624        """Dismiss alert dialog."""
625        self.browser.switch_to.alert.dismiss()
626
627    def solve_recaptcha_v3(
628        self,
629        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
630        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
631    ):
632        """Pass google recaptcha v3 by solving an audio puzzle.
633
634        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
635        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
636        pass None to this argument.
637
638        """
639        locator_method = self.locator_method
640        self.locator_method = "xpath"
641        try:
642            if outer_iframe_xpath:
643                self.switch_to_iframe(outer_iframe_xpath)
644                self.click('//*[@id="recaptcha-anchor"]')
645                self.switch_to_parent_frame()
646            self.switch_to_iframe(inner_iframe_xpath)
647            self.click('//*[@id="recaptcha-audio-button"]')
648            mp3_url = self.find(
649                '//a[@class="rc-audiochallenge-tdownload-link"]'
650            ).get_attribute("href")
651            text = get_text_from_url(mp3_url, ".mp3")
652            self.send_keys('//*[@id="audio-response"]', text)
653            self.click('//*[@id="recaptcha-verify-button"]')
654        except Exception as e:
655            print(e)
656            raise Exception("Could not solve captcha")
657        finally:
658            self.switch_to_parent_frame()
659            self.locator_method = locator_method

Sits on top of selenium to streamline automation and scraping tasks.

User( headless: bool = False, browser_type: str = 'firefox', implicit_wait: int = 10, page_load_timeout: int = 60, open_browser: bool = True, locator_method: str = 'xpath', randomize_user_agent: bool = True, user_agent_rotation_period: int = None, move_window_by: tuple[int, int] = (0, -1000), download_dir: str | pathlib.Path = None, driver_path: str | pathlib.Path = None)
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.timer = Timer()
 93        self.timer.start()
 94        self.randomize_user_agent = randomize_user_agent
 95        self.user_agent_rotation_period = user_agent_rotation_period
 96        self.locator_method = locator_method
 97        self.turbo()
 98        self.keys = Keys
 99        self.move_window_by = move_window_by
100        self.download_dir = download_dir
101        self.driver_path = driver_path
102        if not self.driver_path:
103            self.search_for_driver()
104        if open_browser:
105            self.open_browser()
106        else:
107            self.browser = None
108        atexit.register(self.close_browser)
Parameters
  • headless: If True, browser window will not be visible.

  • browser_type: Which browser to use. Can be 'firefox' or 'chrome'.

  • implicit_wait: Number of seconds to look for a specified element before selenium considers it missing and throws an exception.

  • page_load_timeout: Time in seconds for selenium to wait for a page to load before throwing an exception.

  • open_browser: If True, opens a browser window when a User object is created. If False, a manual call to self.open_browser() must be made.

  • locator_method: The locator type User should expect to be given. Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'. Every member function with a 'locator' argument refers to a string matching the current locator_method.

  • randomize_user_agent: If True, a random useragent will be used whenever the browser is opened. If False, the native useragent will be used.

  • user_agent_rotation_period: If not None, the browser window will be closed and reopened with a new useragent every user_agent_rotation_period number of minutes. Rotation occurs on the first call to self.get() after the time period has elapsed. Ignored if randomize_user_agent is False.

  • move_window_by: The x and y amount of pixels to move the browser window by after opening.

  • download_dir: The download folder to use. If None, the default folder will be used.

  • driver_path: The path to the webdriver executable selenium should use. If None, the system PATH will be checked for the executable. If the executable isn't found, the parent directories and the immediate child directories of the current working directory will be searched.

def configure_firefox(self) -> selenium.webdriver.firefox.service.Service:
116    def configure_firefox(self) -> FirefoxService:
117        """Configure options and profile for firefox."""
118        self.options = FirefoxOptions()
119        self.options.headless = self.headless
120        self.options.set_preference(
121            "widget.windows.window_occlusion_tracking.enabled", False
122        )
123        self.options.set_preference("dom.webaudio.enabled", False)
124        if self.randomize_user_agent:
125            self.options.set_preference("general.useragent.override", get_agent())
126        if self.download_dir:
127            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
128            self.profile = FirefoxProfile()
129            self.profile.set_preference("browser.download.dir", str(self.download_dir))
130            self.profile.set_preference("browser.download.folderList", 2)
131        else:
132            self.profile = None
133        self.service = FirefoxService(
134            executable_path=str(self.driver_path), log_path=os.devnull
135        )

Configure options and profile for firefox.

def configure_chrome(self) -> selenium.webdriver.chrome.service.Service:
137    def configure_chrome(self) -> ChromeService:
138        """Configure options and profile for chrome."""
139        self.options = ChromeOptions()
140        self.options.headless = self.headless
141        self.options.add_argument("--disable-blink-features=AutomationControlled")
142        self.options.add_argument("--mute-audio")
143        self.options.add_argument("--disable-infobars")
144        self.options.add_argument("--disable-notifications")
145        self.options.add_argument("--log-level=3")
146        if self.randomize_user_agent:
147            self.options.add_argument(f"--user-agent={get_agent()}")
148        self.options.add_experimental_option("useAutomationExtension", False)
149        if self.download_dir:
150            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
151            self.options.add_experimental_option(
152                "prefs", {"download.default_directory": str(self.download_dir)}
153            )
154        self.service = ChromeService(
155            executable_path=str(self.driver_path), log_path=os.devnull
156        )

Configure options and profile for chrome.

def search_for_driver(self):
158    def search_for_driver(self):
159        """Searches for the webdriver executable."""
160        cwd = Path.cwd()
161        found = False
162        match self.browser_type:
163            case "firefox":
164                driver = "geckodriver.exe"
165            case "chrome":
166                driver = "chromedriver.exe"
167        # search PATH
168        env_path = os.environ["PATH"]
169        if sys.platform == "win32":
170            env_paths = env_path.split(";")
171        else:
172            env_paths = env_path.split(":")
173            driver = driver[: driver.find(".")]
174        for path in env_paths:
175            if (Path(path) / driver).exists():
176                self.driver_path = Path(path) / driver
177                found = True
178                break
179        # check current working directory and parent folders
180        if not found:
181            while cwd != cwd.parent:
182                if (cwd / driver).exists():
183                    self.driver_path = cwd / driver
184                    found = True
185                    break
186                cwd = cwd.parent
187            # check top most level
188            if not found and (cwd / driver).exists():
189                self.driver_path = cwd / driver
190                found = True
191        # check child folders (only 1 level down)
192        if not found:
193            for child in Path.cwd().iterdir():
194                if child.is_dir() and (child / driver).exists():
195                    self.driver_path = child / driver
196                    found = True
197        if not found:
198            warn(f"Could not find {driver}")

Searches for the webdriver executable.

def set_implicit_wait(self, wait_time: int = None):
200    def set_implicit_wait(self, wait_time: int = None):
201        """Sets to default time if no arg given."""
202        if not wait_time:
203            self.browser.implicitly_wait(self.implicit_wait)
204        else:
205            self.browser.implicitly_wait(wait_time)

Sets to default time if no arg given.

def open_browser(self):
207    def open_browser(self):
208        """Configures and opens selenium browser."""
209        if not self.browser_open:
210            match self.browser_type:
211                case "firefox":
212                    self.configure_firefox()
213                    self.browser = webdriver.Firefox(
214                        options=self.options,
215                        service=self.service,
216                        firefox_profile=self.profile,
217                    )
218                case "chrome":
219                    self.configure_chrome()
220                    self.browser = webdriver.Chrome(
221                        options=self.options, service=self.service
222                    )
223            self.set_implicit_wait()
224            self.browser.maximize_window()
225            self.browser.set_window_position(
226                self.move_window_by[0], self.move_window_by[1]
227            )
228            self.browser.maximize_window()
229            self.browser.set_page_load_timeout(self.page_load_timeout)
230            self.browser_open = True
231            self.tab_index = 0
232            self.rotation_timer.start()
233        else:
234            warn("Browser already open.")

Configures and opens selenium browser.

def close_browser(self):
236    def close_browser(self):
237        """Close browser window."""
238        if self.browser_open:
239            self.browser_open = False
240            self.browser.quit()
241        (Path.cwd() / "atexit_worked.txt").touch()

Close browser window.

def open_tab(self, url: str = '', switch_to_tab: bool = True):
243    def open_tab(self, url: str = "", switch_to_tab: bool = True):
244        """Opens new tab and, if provided, goes to url.
245
246        New tab is inserted after currently active tab."""
247        self.script("window.open(arguments[0]);", url)
248        if switch_to_tab:
249            self.switch_to_tab(self.tab_index + 1)

Opens new tab and, if provided, goes to url.

New tab is inserted after currently active tab.

def switch_to_tab(self, tab_index: int):
251    def switch_to_tab(self, tab_index: int):
252        """Switch to a tab in browser, zero indexed."""
253        self.browser.switch_to.window(self.browser.window_handles[tab_index])
254        self.tab_index = tab_index

Switch to a tab in browser, zero indexed.

def get_num_tabs(self) -> int:
256    def get_num_tabs(self) -> int:
257        """Returns number of tabs open."""
258        return len(self.browser.window_handles)

Returns number of tabs open.

def close_tab(self, tab_index: int = 1):
260    def close_tab(self, tab_index: int = 1):
261        """Close specified tab and
262        switches to tab index 0."""
263        self.switch_to_tab(tab_index)
264        self.browser.close()
265        self.switch_to_tab(0)

Close specified tab and switches to tab index 0.

def get(self, url: str):
267    def get(self, url: str):
268        """Requests webpage at given url and rotates userAgent if necessary."""
269        if not self.browser_open:
270            self.open_browser()
271        if (
272            self.randomize_user_agent
273            and self.user_agent_rotation_period is not None
274            and self.rotation_timer.check(format=False)
275            > (60 * self.user_agent_rotation_period)
276        ):
277            self.rotation_timer.stop()
278            self.close_browser()
279            self.open_browser()
280        self.browser.get(url)
281        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
282        self.chill(self.arrival_wait)

Requests webpage at given url and rotates userAgent if necessary.

def get_soup(self) -> bs4.BeautifulSoup:
284    def get_soup(self) -> BeautifulSoup:
285        """Returns a BeautifulSoup object
286        of the current page source."""
287        return BeautifulSoup(self.browser.page_source, "html.parser")

Returns a BeautifulSoup object of the current page source.

def current_url(self) -> str:
289    def current_url(self) -> str:
290        """Returns current url of active tab."""
291        return self.browser.current_url

Returns current url of active tab.

def delete_cookies(self):
293    def delete_cookies(self):
294        """Delete all cookies for
295        this browser instance."""
296        self.browser.delete_all_cookies()

Delete all cookies for this browser instance.

def turbo(self, engage: bool = True):
298    def turbo(self, engage: bool = True):
299        """When engaged, strings will be sent
300        to elements all at once and there will be
301        no waiting after actions.
302
303        When disengaged, strings will be sent to elements
304        'one key at a time' with randomized amounts of
305        time between successive keys and after actions."""
306        if engage:
307            self.after_key_wait = (0, 0)
308            self.after_field_wait = (0, 0)
309            self.after_click_wait = (0, 0)
310            self.arrival_wait = (1, 1)
311            self.one_key_at_a_time = False
312            self.turbo_engaged = True
313        else:
314            self.after_key_wait = (0.1, 0.5)
315            self.after_field_wait = (1, 2)
316            self.after_click_wait = (0.25, 1.5)
317            self.arrival_wait = (4, 10)
318            self.one_key_at_a_time = True
319            self.turbo_engaged = False

When engaged, strings will be sent to elements all at once and there will be no waiting after actions.

When disengaged, strings will be sent to elements 'one key at a time' with randomized amounts of time between successive keys and after actions.

def chill(self, min_max: tuple[float, float]):
321    def chill(self, min_max: tuple[float, float]):
322        """Sleeps a random amount
323        between min_max[0] and min_max[1]."""
324        time.sleep(random.uniform(min_max[0], min_max[1]))

Sleeps a random amount between min_max[0] and min_max[1].

def script(self, script: str, args: Any = None) -> Any:
326    def script(self, script: str, args: Any = None) -> Any:
327        """Execute javascript code and returns result."""
328        return self.browser.execute_script(script, args)

Execute javascript code and returns result.

def remove(self, locator: str):
330    def remove(self, locator: str):
331        """Removes element from DOM."""
332        self.script("arguments[0].remove();", self.find(locator))

Removes element from DOM.

def get_length(self, locator: str) -> int:
334    def get_length(self, locator: str) -> int:
335        """Returns number of child elements for a given element."""
336        return int(self.script("return arguments[0].length;", self.find(locator)))

Returns number of child elements for a given element.

def find(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
338    def find(self, locator: str) -> WebElement:
339        """Finds and returns a WebElement."""
340        match self.locator_method:
341            case "xpath":
342                return self.browser.find_element(By.XPATH, locator)
343            case "id":
344                return self.browser.find_element(By.ID, locator)
345            case "className":
346                return self.browser.find_element(By.CLASS_NAME, locator)
347            case "name":
348                return self.browser.find_element(By.NAME, locator)
349            case "cssSelector":
350                return self.browser.find_element(By.CSS_SELECTOR, locator)

Finds and returns a WebElement.

def find_children( self, locator: str) -> list[selenium.webdriver.remote.webelement.WebElement]:
352    def find_children(self, locator: str) -> list[WebElement]:
353        """Returns a list of child WebElements
354        for given locator arg."""
355        element = self.find(locator)
356        return element.find_elements("xpath", "./*")

Returns a list of child WebElements for given locator arg.

def scroll(self, amount: int = None, fraction: float = None):
358    def scroll(self, amount: int = None, fraction: float = None):
359        """Scroll web page.
360        :param amount: The number of lines to scroll if not None.
361
362        :param fraction: The amount between 0.0 and 1.0
363        of the page height to scroll.
364
365        If values are provided for both arguments,
366        amount will be used.
367
368        If values are provided for neither argument,
369        the entire page length will be scrolled.
370
371        Scrolls one line at a time if self.turbo is False."""
372        if amount:
373            amount_to_scroll = amount
374        elif fraction:
375            amount_to_scroll = int(
376                fraction
377                * (
378                    int(self.script("return document.body.scrollHeight;"))
379                    - int(self.script("return window.pageYOffset;"))
380                )
381            )
382        else:
383            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
384        if self.turbo_engaged:
385            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
386        else:
387            for _ in range(abs(amount_to_scroll)):
388                if amount_to_scroll >= 0:
389                    self.script("window.scrollBy(0,1);")
390                else:
391                    self.script("window.scrollBy(0,-1);")
392        self.chill(self.after_click_wait)

Scroll web page.

Parameters
  • amount: The number of lines to scroll if not None.

  • fraction: The amount between 0.0 and 1.0 of the page height to scroll.

If values are provided for both arguments, amount will be used.

If values are provided for neither argument, the entire page length will be scrolled.

Scrolls one line at a time if self.turbo is False.

def scroll_into_view(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
394    def scroll_into_view(self, locator: str) -> WebElement:
395        """Scrolls to a given element and returns the element."""
396        element = self.find(locator)
397        self.script("arguments[0].scroll_into_view();", element)
398        self.chill(self.after_click_wait)
399        return element

Scrolls to a given element and returns the element.

def text(self, locator: str) -> str:
401    def text(self, locator: str) -> str:
402        """Returns text of WebElement."""
403        return self.find(locator).text

Returns text of WebElement.

def click(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
405    def click(self, locator: str) -> WebElement:
406        """Clicks on and returns WebElement."""
407        element = self.find(locator)
408        element.click()
409        self.chill(self.after_click_wait)
410        return element

Clicks on and returns WebElement.

def clear(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
412    def clear(self, locator: str) -> WebElement:
413        """Clears content of WebElement if able
414        and then returns WebElement."""
415        element = self.find(locator)
416        element.clear()
417        self.chill(self.after_click_wait)
418        return element

Clears content of WebElement if able and then returns WebElement.

def switch_to_iframe(self, locator: str):
420    def switch_to_iframe(self, locator: str):
421        """Switch to an iframe from given locator."""
422        self.browser.switch_to.frame(self.find(locator))

Switch to an iframe from given locator.

def switch_to_parent_frame(self):
424    def switch_to_parent_frame(self):
425        """Move up a frame level from current frame."""
426        self.browser.switch_to.parent_frame()

Move up a frame level from current frame.

def select( self, locator: str, method: str, choice: str | int | tuple) -> selenium.webdriver.remote.webelement.WebElement:
428    def select(
429        self, locator: str, method: str, choice: str | int | tuple
430    ) -> WebElement:
431        """Select a choice from Select element.
432        Returns the Select element from the locator string,
433        not the option element that is selected.
434
435        :param method: Can be 'value' or 'index'
436
437        :param choice: The option to select.
438
439        If method is 'value', then choice should be
440        the html 'value' attribute of the desired option.
441
442        If method is 'index', choice can either be a single
443        int for the desired option or it can be a two-tuple.
444        If the tuple is provided, a random option between the
445        two indicies (inclusive) will be selected."""
446        element = self.click(locator)
447        match method:
448            case "value":
449                Select(element).select_by_value(choice)
450            case "index":
451                if type(choice) == tuple:
452                    choice = random.randint(choice[0], choice[1])
453                Select(element).select_by_index(choice)
454        self.chill(self.after_field_wait)
455        return element

Select a choice from Select element. Returns the Select element from the locator string, not the option element that is selected.

Parameters
  • method: Can be 'value' or 'index'

  • choice: The option to select.

If method is 'value', then choice should be the html 'value' attribute of the desired option.

If method is 'index', choice can either be a single int for the desired option or it can be a two-tuple. If the tuple is provided, a random option between the two indicies (inclusive) will be selected.

def click_elements( self, locators: list[str], max_selections: int = None, min_selections: int = 1) -> selenium.webdriver.remote.webelement.WebElement:
457    def click_elements(
458        self, locators: list[str], max_selections: int = None, min_selections: int = 1
459    ) -> WebElement:
460        """Click a random number of WebElements
461        and return the last WebElement clicked.
462
463        :param locators: A list of element locators to choose from.
464
465        :param max_selections: The maximum number of elements to click.
466        If None, the maximum will be the length of the locators list.
467
468        :param min_selections: The minimum number of elements to click.
469
470        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
471        will click between 1 and 3 random elements from the list.
472        """
473        if not max_selections:
474            max_selections = len(locators)
475        for option in random.sample(
476            locators, k=random.randint(min_selections, max_selections)
477        ):
478            element = self.click(option)
479        return element

Click a random number of WebElements and return the last WebElement clicked.

Parameters
  • locators: A list of element locators to choose from.

  • max_selections: The maximum number of elements to click. If None, the maximum will be the length of the locators list.

  • min_selections: The minimum number of elements to click.

e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3) will click between 1 and 3 random elements from the list.

def get_click_list( self, num_options: int, max_choices: int = 1, min_choices: int = 1) -> list[str]:
481    def get_click_list(
482        self, num_options: int, max_choices: int = 1, min_choices: int = 1
483    ) -> list[str]:
484        """Similar to self.click_elements(), but for use with the self.fill_next() method.
485
486        Creates a list of length 'num_options' where every element is 'skip'.
487
488        A random number of elements in the list between 'min_choices' and 'max_choices' are
489        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
490        click_list = ["skip"] * num_options
491        selected_indexes = []
492        for i in range(random.randint(min_choices, max_choices)):
493            index = random.randint(0, num_options - 1)
494            while index in selected_indexes:
495                index = random.randint(0, num_options - 1)
496            selected_indexes.append(index)
497            click_list[index] = self.keys.SPACE
498        return click_list

Similar to self.click_elements(), but for use with the self.fill_next() method.

Creates a list of length 'num_options' where every element is 'skip'.

A random number of elements in the list between 'min_choices' and 'max_choices' are replaced with 'keys.SPACE' (interpreted as a click by almost all web forms).

def send_keys( self, locator: str, data: str, click_first: bool = True, clear_first: bool = False) -> selenium.webdriver.remote.webelement.WebElement:
500    def send_keys(
501        self,
502        locator: str,
503        data: str,
504        click_first: bool = True,
505        clear_first: bool = False,
506    ) -> WebElement:
507        """Types data into element and returns the element.
508
509        :param data: The string to send to the element.
510
511        :param click_first: If True, the element is clicked on
512        before the data is sent.
513
514        :param clear_first: If True, the current text of the element
515        is cleared before the data is sent."""
516        element = self.click(locator) if click_first else self.find(locator)
517        if clear_first:
518            element.clear()
519            self.chill(self.after_click_wait)
520        if self.one_key_at_a_time:
521            for ch in str(data):
522                element.send_keys(ch)
523                self.chill(self.after_key_wait)
524        else:
525            element.send_keys(str(data))
526        self.chill(self.after_field_wait)
527        return element

Types data into element and returns the element.

Parameters
  • data: The string to send to the element.

  • click_first: If True, the element is clicked on before the data is sent.

  • clear_first: If True, the current text of the element is cleared before the data is sent.

def fill_next( self, data: list[str | tuple], start_element: selenium.webdriver.remote.webelement.WebElement = None) -> selenium.webdriver.remote.webelement.WebElement:
529    def fill_next(
530        self, data: list[str | tuple], start_element: WebElement = None
531    ) -> WebElement:
532        """Fills a form by tabbing from the current WebElement
533        to the next one and using the corresponding item in data.
534        Returns the last WebElement.
535
536        :param data: A list of form data. If an item is a string (except for 'skip')
537        it will be typed into the current WebElement.
538
539        An item in data can be a two-tuple of the form
540        ('downArrow', numberOfPresses:int|tuple[int, int]).
541
542        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
543        that many times to the WebElement.
544
545        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
546        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
547        This is typically for use with Select elements.
548
549        An item in data can also be 'skip', which will perform no action on the current
550        WebElement and will continue to the next one.
551
552        :param start_element: The WebElement to start tabbing from.
553        The currently active element will be used if start_element is None.
554
555        Note: The function tabs to the next element before sending data,
556        so the start_element should the WebElement before the one
557        that should receive data[0].
558        """
559        element = (
560            self.browser.switch_to.active_element
561            if not start_element
562            else start_element
563        )
564        for datum in data:
565            element.send_keys(Keys.TAB)
566            element = self.browser.switch_to.active_element
567            self.chill(self.after_key_wait)
568            if datum[0] == "downArrow":
569                if type(datum[1]) == tuple:
570                    times = random.randint(datum[1][0], datum[1][1])
571                else:
572                    times = datum[1]
573                for _ in range(times):
574                    element.send_keys(Keys.ARROW_DOWN)
575                    self.chill(self.after_key_wait)
576            elif datum == "skip":
577                self.chill(self.after_key_wait)
578            else:
579                if self.turbo_engaged:
580                    element.send_keys(str(datum))
581                else:
582                    for ch in str(datum):
583                        element.send_keys(ch)
584                        self.chill(self.after_key_wait)
585            self.chill(self.after_field_wait)
586        return element

Fills a form by tabbing from the current WebElement to the next one and using the corresponding item in data. Returns the last WebElement.

Parameters
  • data: A list of form data. If an item is a string (except for 'skip') it will be typed into the current WebElement.

An item in data can be a two-tuple of the form ('downArrow', numberOfPresses:int|tuple[int, int]).

If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent that many times to the WebElement.

If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random number of times between numberOfPresses[0] and numberOfPresses[1] inclusive. This is typically for use with Select elements.

An item in data can also be 'skip', which will perform no action on the current WebElement and will continue to the next one.

  • start_element: The WebElement to start tabbing from. The currently active element will be used if start_element is None.

Note: The function tabs to the next element before sending data, so the start_element should the WebElement before the one that should receive data[0].

def wait_until( self, condition: function, max_wait: float = 10, polling_interval: float = 0.1):
588    def wait_until(
589        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
590    ):
591        """Checks condition repeatedly until either it is true,
592        or the max_wait is exceeded.
593
594        Raises a TimeoutError if the condition doesn't success within max_wait.
595
596        Useful for determing whether a form has been successfully submitted.
597
598        :param condition: The condition function to check.
599
600        :param max_wait: Number of seconds to continue checking condition
601        before throwing a TimeoutError.
602
603        :param polling_interval: The number of seconds to sleep before
604        checking the condition function again after it fails.
605
606        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
607        start_time = time.time()
608        while True:
609            try:
610                if condition():
611                    time.sleep(1)
612                    break
613                elif (time.time() - start_time) > max_wait:
614                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
615                else:
616                    time.sleep(polling_interval)
617            except:
618                if (time.time() - start_time) > max_wait:
619                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
620                else:
621                    time.sleep(polling_interval)

Checks condition repeatedly until either it is true, or the max_wait is exceeded.

Raises a TimeoutError if the condition doesn't success within max_wait.

Useful for determing whether a form has been successfully submitted.

Parameters
  • condition: The condition function to check.

  • max_wait: Number of seconds to continue checking condition before throwing a TimeoutError.

  • polling_interval: The number of seconds to sleep before checking the condition function again after it fails.

e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))

def dismiss_alert(self):
623    def dismiss_alert(self):
624        """Dismiss alert dialog."""
625        self.browser.switch_to.alert.dismiss()

Dismiss alert dialog.

def solve_recaptcha_v3( self, outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]', inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]'):
627    def solve_recaptcha_v3(
628        self,
629        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
630        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
631    ):
632        """Pass google recaptcha v3 by solving an audio puzzle.
633
634        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
635        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
636        pass None to this argument.
637
638        """
639        locator_method = self.locator_method
640        self.locator_method = "xpath"
641        try:
642            if outer_iframe_xpath:
643                self.switch_to_iframe(outer_iframe_xpath)
644                self.click('//*[@id="recaptcha-anchor"]')
645                self.switch_to_parent_frame()
646            self.switch_to_iframe(inner_iframe_xpath)
647            self.click('//*[@id="recaptcha-audio-button"]')
648            mp3_url = self.find(
649                '//a[@class="rc-audiochallenge-tdownload-link"]'
650            ).get_attribute("href")
651            text = get_text_from_url(mp3_url, ".mp3")
652            self.send_keys('//*[@id="audio-response"]', text)
653            self.click('//*[@id="recaptcha-verify-button"]')
654        except Exception as e:
655            print(e)
656            raise Exception("Could not solve captcha")
657        finally:
658            self.switch_to_parent_frame()
659            self.locator_method = locator_method

Pass google recaptcha v3 by solving an audio puzzle.

Parameters
  • outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox. If it's the recaptcha without the initial checkbox that just shows the image puzzle, pass None to this argument.