seleniumuser.seleniumuser

  1import atexit
  2import os
  3import random
  4import sys
  5import time
  6from pathlib import Path
  7from types import LambdaType
  8from typing import Any
  9from warnings import warn
 10
 11from bs4 import BeautifulSoup
 12from selenium import webdriver
 13from selenium.webdriver.chrome.options import Options as ChromeOptions
 14from selenium.webdriver.chrome.service import Service as ChromeService
 15from selenium.webdriver.common.by import By
 16from selenium.webdriver.common.keys import Keys
 17from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 18from selenium.webdriver.firefox.options import Options as FirefoxOptions
 19from selenium.webdriver.firefox.service import Service as FirefoxService
 20from selenium.webdriver.remote.webelement import WebElement
 21from selenium.webdriver.support.ui import Select
 22
 23from noiftimer import Timer
 24from voxscribe import get_text_from_url
 25from whosyouragent import get_agent
 26
 27
 28class User:
 29    """Sits on top of selenium to streamline
 30    automation and scraping tasks."""
 31
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.randomize_user_agent = randomize_user_agent
 93        self.user_agent_rotation_period = user_agent_rotation_period
 94        self.locator_method = locator_method
 95        self.turbo()
 96        self.keys = Keys
 97        self.move_window_by = move_window_by
 98        self.download_dir = download_dir
 99        self.driver_path = driver_path
100        if not self.driver_path:
101            self.search_for_driver()
102        if open_browser:
103            self.open_browser()
104        else:
105            self.browser = None
106        atexit.register(self.close_browser)
107
108    def __enter__(self):
109        return self
110
111    def __exit__(self, *args):
112        self.close_browser()
113
114    def configure_firefox(self) -> FirefoxService:
115        """Configure options and profile for firefox."""
116        self.options = FirefoxOptions()
117        self.options.headless = self.headless
118        self.options.set_preference(
119            "widget.windows.window_occlusion_tracking.enabled", False
120        )
121        self.options.set_preference("dom.webaudio.enabled", False)
122        if self.randomize_user_agent:
123            self.options.set_preference("general.useragent.override", get_agent())
124        if self.download_dir:
125            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
126            self.profile = FirefoxProfile()
127            self.profile.set_preference("browser.download.dir", str(self.download_dir))
128            self.profile.set_preference("browser.download.folderList", 2)
129        else:
130            self.profile = None
131        self.service = FirefoxService(
132            executable_path=str(self.driver_path), log_path=os.devnull
133        )
134
135    def configure_chrome(self) -> ChromeService:
136        """Configure options and profile for chrome."""
137        self.options = ChromeOptions()
138        self.options.headless = self.headless
139        self.options.add_argument("--disable-blink-features=AutomationControlled")
140        self.options.add_argument("--mute-audio")
141        self.options.add_argument("--disable-infobars")
142        self.options.add_argument("--disable-notifications")
143        self.options.add_argument("--log-level=3")
144        if self.randomize_user_agent:
145            self.options.add_argument(f"--user-agent={get_agent()}")
146        self.options.add_experimental_option("useAutomationExtension", False)
147        if self.download_dir:
148            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
149            self.options.add_experimental_option(
150                "prefs", {"download.default_directory": str(self.download_dir)}
151            )
152        self.service = ChromeService(
153            executable_path=str(self.driver_path), log_path=os.devnull
154        )
155
156    def search_for_driver(self):
157        """Searches for the webdriver executable."""
158        cwd = Path.cwd()
159        found = False
160        match self.browser_type:
161            case "firefox":
162                driver = "geckodriver.exe"
163            case "chrome":
164                driver = "chromedriver.exe"
165        # search PATH
166        env_path = os.environ["PATH"]
167        if sys.platform == "win32":
168            env_paths = env_path.split(";")
169        else:
170            env_paths = env_path.split(":")
171            driver = driver[: driver.find(".")]
172        for path in env_paths:
173            if (Path(path) / driver).exists():
174                self.driver_path = Path(path) / driver
175                found = True
176                break
177        # check current working directory and parent folders
178        if not found:
179            while cwd != cwd.parent:
180                if (cwd / driver).exists():
181                    self.driver_path = cwd / driver
182                    found = True
183                    break
184                cwd = cwd.parent
185            # check top most level
186            if not found and (cwd / driver).exists():
187                self.driver_path = cwd / driver
188                found = True
189        # check child folders (only 1 level down)
190        if not found:
191            for child in Path.cwd().iterdir():
192                if child.is_dir() and (child / driver).exists():
193                    self.driver_path = child / driver
194                    found = True
195        if not found:
196            warn(f"Could not find {driver}")
197
198    def set_implicit_wait(self, wait_time: int = None):
199        """Sets to default time if no arg given."""
200        if not wait_time:
201            self.browser.implicitly_wait(self.implicit_wait)
202        else:
203            self.browser.implicitly_wait(wait_time)
204
205    def open_browser(self):
206        """Configures and opens selenium browser."""
207        if not self.browser_open:
208            match self.browser_type:
209                case "firefox":
210                    self.configure_firefox()
211                    self.browser = webdriver.Firefox(
212                        options=self.options,
213                        service=self.service,
214                        firefox_profile=self.profile,
215                    )
216                case "chrome":
217                    self.configure_chrome()
218                    self.browser = webdriver.Chrome(
219                        options=self.options, service=self.service
220                    )
221            self.set_implicit_wait()
222            self.browser.maximize_window()
223            self.browser.set_window_position(
224                self.move_window_by[0], self.move_window_by[1]
225            )
226            self.browser.maximize_window()
227            self.browser.set_page_load_timeout(self.page_load_timeout)
228            self.browser_open = True
229            self.tab_index = 0
230            self.rotation_timer.start()
231        else:
232            warn("Browser already open.")
233
234    def close_browser(self):
235        """Close browser window."""
236        if self.browser_open:
237            self.browser_open = False
238            self.browser.quit()
239
240    def open_tab(self, url: str = "", switch_to_tab: bool = True):
241        """Opens new tab and, if provided, goes to url.
242
243        New tab is inserted after currently active tab."""
244        self.script("window.open(arguments[0]);", url)
245        if switch_to_tab:
246            self.switch_to_tab(self.tab_index + 1)
247
248    def switch_to_tab(self, tab_index: int):
249        """Switch to a tab in browser, zero indexed."""
250        self.browser.switch_to.window(self.browser.window_handles[tab_index])
251        self.tab_index = tab_index
252
253    def get_num_tabs(self) -> int:
254        """Returns number of tabs open."""
255        return len(self.browser.window_handles)
256
257    def close_tab(self, tab_index: int = 1):
258        """Close specified tab and
259        switches to tab index 0."""
260        self.switch_to_tab(tab_index)
261        self.browser.close()
262        self.switch_to_tab(0)
263
264    def get(self, url: str):
265        """Requests webpage at given url and rotates userAgent if necessary."""
266        if not self.browser_open:
267            self.open_browser()
268        if (
269            self.randomize_user_agent
270            and self.user_agent_rotation_period is not None
271            and self.rotation_timer.elapsed > (60 * self.user_agent_rotation_period)
272        ):
273            self.rotation_timer.stop()
274            self.close_browser()
275            self.open_browser()
276        self.browser.get(url)
277        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
278        self.chill(self.arrival_wait)
279
280    def get_soup(self) -> BeautifulSoup:
281        """Returns a BeautifulSoup object
282        of the current page source."""
283        return BeautifulSoup(self.browser.page_source, "html.parser")
284
285    def current_url(self) -> str:
286        """Returns current url of active tab."""
287        return self.browser.current_url
288
289    def delete_cookies(self):
290        """Delete all cookies for
291        this browser instance."""
292        self.browser.delete_all_cookies()
293
294    def turbo(self, engage: bool = True):
295        """When engaged, strings will be sent
296        to elements all at once and there will be
297        no waiting after actions.
298
299        When disengaged, strings will be sent to elements
300        'one key at a time' with randomized amounts of
301        time between successive keys and after actions."""
302        if engage:
303            self.after_key_wait = (0, 0)
304            self.after_field_wait = (0, 0)
305            self.after_click_wait = (0, 0)
306            self.arrival_wait = (1, 1)
307            self.one_key_at_a_time = False
308            self.turbo_engaged = True
309        else:
310            self.after_key_wait = (0.1, 0.5)
311            self.after_field_wait = (1, 2)
312            self.after_click_wait = (0.25, 1.5)
313            self.arrival_wait = (4, 10)
314            self.one_key_at_a_time = True
315            self.turbo_engaged = False
316
317    def chill(self, min_max: tuple[float, float]):
318        """Sleeps a random amount
319        between min_max[0] and min_max[1]."""
320        time.sleep(random.uniform(min_max[0], min_max[1]))
321
322    def script(self, script: str, args: Any = None) -> Any:
323        """Execute javascript code and returns result."""
324        return self.browser.execute_script(script, args)
325
326    def remove(self, locator: str):
327        """Removes element from DOM."""
328        self.script("arguments[0].remove();", self.find(locator))
329
330    def get_length(self, locator: str) -> int:
331        """Returns number of child elements for a given element."""
332        return int(self.script("return arguments[0].length;", self.find(locator)))
333
334    def find(self, locator: str) -> WebElement:
335        """Finds and returns a WebElement."""
336        match self.locator_method:
337            case "xpath":
338                return self.browser.find_element(By.XPATH, locator)
339            case "id":
340                return self.browser.find_element(By.ID, locator)
341            case "className":
342                return self.browser.find_element(By.CLASS_NAME, locator)
343            case "name":
344                return self.browser.find_element(By.NAME, locator)
345            case "cssSelector":
346                return self.browser.find_element(By.CSS_SELECTOR, locator)
347
348    def find_children(self, locator: str) -> list[WebElement]:
349        """Returns a list of child WebElements
350        for given locator arg."""
351        element = self.find(locator)
352        return element.find_elements("xpath", "./*")
353
354    def scroll(self, amount: int = None, fraction: float = None):
355        """Scroll web page.
356        :param amount: The number of lines to scroll if not None.
357
358        :param fraction: The amount between 0.0 and 1.0
359        of the page height to scroll.
360
361        If values are provided for both arguments,
362        amount will be used.
363
364        If values are provided for neither argument,
365        the entire page length will be scrolled.
366
367        Scrolls one line at a time if self.turbo is False."""
368        if amount:
369            amount_to_scroll = amount
370        elif fraction:
371            amount_to_scroll = int(
372                fraction
373                * (
374                    int(self.script("return document.body.scrollHeight;"))
375                    - int(self.script("return window.pageYOffset;"))
376                )
377            )
378        else:
379            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
380        if self.turbo_engaged:
381            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
382        else:
383            for _ in range(abs(amount_to_scroll)):
384                if amount_to_scroll >= 0:
385                    self.script("window.scrollBy(0,1);")
386                else:
387                    self.script("window.scrollBy(0,-1);")
388        self.chill(self.after_click_wait)
389
390    def scroll_into_view(self, locator: str) -> WebElement:
391        """Scrolls to a given element and returns the element."""
392        element = self.find(locator)
393        self.script("arguments[0].scroll_into_view();", element)
394        self.chill(self.after_click_wait)
395        return element
396
397    def text(self, locator: str) -> str:
398        """Returns text of WebElement."""
399        return self.find(locator).text
400
401    def click(self, locator: str) -> WebElement:
402        """Clicks on and returns WebElement."""
403        element = self.find(locator)
404        element.click()
405        self.chill(self.after_click_wait)
406        return element
407
408    def clear(self, locator: str) -> WebElement:
409        """Clears content of WebElement if able
410        and then returns WebElement."""
411        element = self.find(locator)
412        element.clear()
413        self.chill(self.after_click_wait)
414        return element
415
416    def switch_to_iframe(self, locator: str):
417        """Switch to an iframe from given locator."""
418        self.browser.switch_to.frame(self.find(locator))
419
420    def switch_to_parent_frame(self):
421        """Move up a frame level from current frame."""
422        self.browser.switch_to.parent_frame()
423
424    def select(
425        self, locator: str, method: str, choice: str | int | tuple
426    ) -> WebElement:
427        """Select a choice from Select element.
428        Returns the Select element from the locator string,
429        not the option element that is selected.
430
431        :param method: Can be 'value' or 'index'
432
433        :param choice: The option to select.
434
435        If method is 'value', then choice should be
436        the html 'value' attribute of the desired option.
437
438        If method is 'index', choice can either be a single
439        int for the desired option or it can be a two-tuple.
440        If the tuple is provided, a random option between the
441        two indicies (inclusive) will be selected."""
442        element = self.click(locator)
443        match method:
444            case "value":
445                Select(element).select_by_value(choice)
446            case "index":
447                if type(choice) == tuple:
448                    choice = random.randint(choice[0], choice[1])
449                Select(element).select_by_index(choice)
450        self.chill(self.after_field_wait)
451        return element
452
453    def click_elements(
454        self, locators: list[str], max_selections: int = None, min_selections: int = 1
455    ) -> WebElement:
456        """Click a random number of WebElements
457        and return the last WebElement clicked.
458
459        :param locators: A list of element locators to choose from.
460
461        :param max_selections: The maximum number of elements to click.
462        If None, the maximum will be the length of the locators list.
463
464        :param min_selections: The minimum number of elements to click.
465
466        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
467        will click between 1 and 3 random elements from the list.
468        """
469        if not max_selections:
470            max_selections = len(locators)
471        for option in random.sample(
472            locators, k=random.randint(min_selections, max_selections)
473        ):
474            element = self.click(option)
475        return element
476
477    def get_click_list(
478        self, num_options: int, max_choices: int = 1, min_choices: int = 1
479    ) -> list[str]:
480        """Similar to self.click_elements(), but for use with the self.fill_next() method.
481
482        Creates a list of length 'num_options' where every element is 'skip'.
483
484        A random number of elements in the list between 'min_choices' and 'max_choices' are
485        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
486        click_list = ["skip"] * num_options
487        selected_indexes = []
488        for i in range(random.randint(min_choices, max_choices)):
489            index = random.randint(0, num_options - 1)
490            while index in selected_indexes:
491                index = random.randint(0, num_options - 1)
492            selected_indexes.append(index)
493            click_list[index] = self.keys.SPACE
494        return click_list
495
496    def send_keys(
497        self,
498        locator: str,
499        data: str,
500        click_first: bool = True,
501        clear_first: bool = False,
502    ) -> WebElement:
503        """Types data into element and returns the element.
504
505        :param data: The string to send to the element.
506
507        :param click_first: If True, the element is clicked on
508        before the data is sent.
509
510        :param clear_first: If True, the current text of the element
511        is cleared before the data is sent."""
512        element = self.click(locator) if click_first else self.find(locator)
513        if clear_first:
514            element.clear()
515            self.chill(self.after_click_wait)
516        if self.one_key_at_a_time:
517            for ch in str(data):
518                element.send_keys(ch)
519                self.chill(self.after_key_wait)
520        else:
521            element.send_keys(str(data))
522        self.chill(self.after_field_wait)
523        return element
524
525    def fill_next(
526        self, data: list[str | tuple], start_element: WebElement = None
527    ) -> WebElement:
528        """Fills a form by tabbing from the current WebElement
529        to the next one and using the corresponding item in data.
530        Returns the last WebElement.
531
532        :param data: A list of form data. If an item is a string (except for 'skip')
533        it will be typed into the current WebElement.
534
535        An item in data can be a two-tuple of the form
536        ('downArrow', numberOfPresses:int|tuple[int, int]).
537
538        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
539        that many times to the WebElement.
540
541        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
542        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
543        This is typically for use with Select elements.
544
545        An item in data can also be 'skip', which will perform no action on the current
546        WebElement and will continue to the next one.
547
548        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
549        representing a percent chance an element will be clicked or skipped:
550        >>> user.fill_next(["click=70"])
551
552        has a 70% chance of being
553        >>> user.fill_next([user.keys.SPACE])
554
555        and a 30% chance of being
556        >>> user.fill_next(["skip"])
557
558
559        :param start_element: The WebElement to start tabbing from.
560        The currently active element will be used if start_element is None.
561
562        Note: The function tabs to the next element before sending data,
563        so the start_element should the WebElement before the one
564        that should receive data[0].
565        """
566        element = (
567            self.browser.switch_to.active_element
568            if not start_element
569            else start_element
570        )
571        for datum in data:
572            element.send_keys(Keys.TAB)
573            element = self.browser.switch_to.active_element
574            self.chill(self.after_key_wait)
575            if type(datum) == str and datum.strip().startswith("click="):
576                chance = int(datum.split("=")[1].strip())
577                if random.randint(0, 100) <= chance:
578                    datum = Keys.SPACE
579                else:
580                    datum = "skip"
581            if datum[0] == "downArrow":
582                if type(datum[1]) == tuple:
583                    times = random.randint(datum[1][0], datum[1][1])
584                else:
585                    times = datum[1]
586                for _ in range(times):
587                    element.send_keys(Keys.ARROW_DOWN)
588                    self.chill(self.after_key_wait)
589            elif datum == "skip":
590                self.chill(self.after_key_wait)
591            else:
592
593                if self.turbo_engaged:
594                    element.send_keys(str(datum))
595                else:
596                    for ch in str(datum):
597                        element.send_keys(ch)
598                        self.chill(self.after_key_wait)
599            self.chill(self.after_field_wait)
600        return element
601
602    def wait_until(
603        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
604    ):
605        """Checks condition repeatedly until either it is true,
606        or the max_wait is exceeded.
607
608        Raises a TimeoutError if the condition doesn't success within max_wait.
609
610        Useful for determing whether a form has been successfully submitted.
611
612        :param condition: The condition function to check.
613
614        :param max_wait: Number of seconds to continue checking condition
615        before throwing a TimeoutError.
616
617        :param polling_interval: The number of seconds to sleep before
618        checking the condition function again after it fails.
619
620        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
621        start_time = time.time()
622        while True:
623            try:
624                if condition():
625                    time.sleep(1)
626                    break
627                elif (time.time() - start_time) > max_wait:
628                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
629                else:
630                    time.sleep(polling_interval)
631            except:
632                if (time.time() - start_time) > max_wait:
633                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
634                else:
635                    time.sleep(polling_interval)
636
637    def dismiss_alert(self):
638        """Dismiss alert dialog."""
639        self.browser.switch_to.alert.dismiss()
640
641    def solve_recaptcha_v3(
642        self,
643        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
644        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
645    ):
646        """Pass google recaptcha v3 by solving an audio puzzle.
647
648        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
649        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
650        pass None to this argument.
651
652        """
653        locator_method = self.locator_method
654        self.locator_method = "xpath"
655        try:
656            if outer_iframe_xpath:
657                self.switch_to_iframe(outer_iframe_xpath)
658                self.click('//*[@id="recaptcha-anchor"]')
659                self.switch_to_parent_frame()
660            self.switch_to_iframe(inner_iframe_xpath)
661            self.click('//*[@id="recaptcha-audio-button"]')
662            mp3_url = self.find(
663                '//a[@class="rc-audiochallenge-tdownload-link"]'
664            ).get_attribute("href")
665            text = get_text_from_url(mp3_url, ".mp3")
666            self.send_keys('//*[@id="audio-response"]', text)
667            self.click('//*[@id="recaptcha-verify-button"]')
668        except Exception as e:
669            print(e)
670            raise Exception("Could not solve captcha")
671        finally:
672            self.switch_to_parent_frame()
673            self.locator_method = locator_method
class User:
 29class User:
 30    """Sits on top of selenium to streamline
 31    automation and scraping tasks."""
 32
 33    def __init__(
 34        self,
 35        headless: bool = False,
 36        browser_type: str = "firefox",
 37        implicit_wait: int = 10,
 38        page_load_timeout: int = 60,
 39        open_browser: bool = True,
 40        locator_method: str = "xpath",
 41        randomize_user_agent: bool = True,
 42        user_agent_rotation_period: int = None,
 43        move_window_by: tuple[int, int] = (0, -1000),
 44        download_dir: str | Path = None,
 45        driver_path: str | Path = None,
 46    ):
 47        """
 48        :param headless: If True, browser window will not be visible.
 49
 50        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 51
 52        :param implicit_wait: Number of seconds to look for a specified element before
 53        selenium considers it missing and throws an exception.
 54
 55        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 56        before throwing an exception.
 57
 58        :param open_browser: If True, opens a browser window when a User object is created.
 59        If False, a manual call to self.open_browser() must be made.
 60
 61        :param locator_method: The locator type User should expect to be given.
 62        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 63        Every member function with a 'locator' argument refers to a string matching
 64        the current locator_method.
 65
 66        :param randomize_user_agent: If True, a random useragent will be used whenever
 67        the browser is opened. If False, the native useragent will be used.
 68
 69        :param user_agent_rotation_period: If not None, the browser window will be closed
 70        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 71        Rotation occurs on the first call to self.get() after the time period has elapsed.
 72        Ignored if randomize_user_agent is False.
 73
 74        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 75
 76        :param download_dir: The download folder to use. If None, the default folder will be used.
 77
 78        :param driver_path: The path to the webdriver executable selenium should use.
 79        If None, the system PATH will be checked for the executable.
 80        If the executable isn't found, the parent directories and the immediate child directories
 81        of the current working directory will be searched.
 82        """
 83        self.headless = headless
 84        browser_type = browser_type.lower()
 85        if browser_type in ["firefox", "chrome"]:
 86            self.browser_type = browser_type
 87        else:
 88            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 89        self.browser_open = False
 90        self.implicit_wait = implicit_wait
 91        self.page_load_timeout = page_load_timeout
 92        self.rotation_timer = Timer()
 93        self.randomize_user_agent = randomize_user_agent
 94        self.user_agent_rotation_period = user_agent_rotation_period
 95        self.locator_method = locator_method
 96        self.turbo()
 97        self.keys = Keys
 98        self.move_window_by = move_window_by
 99        self.download_dir = download_dir
100        self.driver_path = driver_path
101        if not self.driver_path:
102            self.search_for_driver()
103        if open_browser:
104            self.open_browser()
105        else:
106            self.browser = None
107        atexit.register(self.close_browser)
108
109    def __enter__(self):
110        return self
111
112    def __exit__(self, *args):
113        self.close_browser()
114
115    def configure_firefox(self) -> FirefoxService:
116        """Configure options and profile for firefox."""
117        self.options = FirefoxOptions()
118        self.options.headless = self.headless
119        self.options.set_preference(
120            "widget.windows.window_occlusion_tracking.enabled", False
121        )
122        self.options.set_preference("dom.webaudio.enabled", False)
123        if self.randomize_user_agent:
124            self.options.set_preference("general.useragent.override", get_agent())
125        if self.download_dir:
126            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
127            self.profile = FirefoxProfile()
128            self.profile.set_preference("browser.download.dir", str(self.download_dir))
129            self.profile.set_preference("browser.download.folderList", 2)
130        else:
131            self.profile = None
132        self.service = FirefoxService(
133            executable_path=str(self.driver_path), log_path=os.devnull
134        )
135
136    def configure_chrome(self) -> ChromeService:
137        """Configure options and profile for chrome."""
138        self.options = ChromeOptions()
139        self.options.headless = self.headless
140        self.options.add_argument("--disable-blink-features=AutomationControlled")
141        self.options.add_argument("--mute-audio")
142        self.options.add_argument("--disable-infobars")
143        self.options.add_argument("--disable-notifications")
144        self.options.add_argument("--log-level=3")
145        if self.randomize_user_agent:
146            self.options.add_argument(f"--user-agent={get_agent()}")
147        self.options.add_experimental_option("useAutomationExtension", False)
148        if self.download_dir:
149            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
150            self.options.add_experimental_option(
151                "prefs", {"download.default_directory": str(self.download_dir)}
152            )
153        self.service = ChromeService(
154            executable_path=str(self.driver_path), log_path=os.devnull
155        )
156
157    def search_for_driver(self):
158        """Searches for the webdriver executable."""
159        cwd = Path.cwd()
160        found = False
161        match self.browser_type:
162            case "firefox":
163                driver = "geckodriver.exe"
164            case "chrome":
165                driver = "chromedriver.exe"
166        # search PATH
167        env_path = os.environ["PATH"]
168        if sys.platform == "win32":
169            env_paths = env_path.split(";")
170        else:
171            env_paths = env_path.split(":")
172            driver = driver[: driver.find(".")]
173        for path in env_paths:
174            if (Path(path) / driver).exists():
175                self.driver_path = Path(path) / driver
176                found = True
177                break
178        # check current working directory and parent folders
179        if not found:
180            while cwd != cwd.parent:
181                if (cwd / driver).exists():
182                    self.driver_path = cwd / driver
183                    found = True
184                    break
185                cwd = cwd.parent
186            # check top most level
187            if not found and (cwd / driver).exists():
188                self.driver_path = cwd / driver
189                found = True
190        # check child folders (only 1 level down)
191        if not found:
192            for child in Path.cwd().iterdir():
193                if child.is_dir() and (child / driver).exists():
194                    self.driver_path = child / driver
195                    found = True
196        if not found:
197            warn(f"Could not find {driver}")
198
199    def set_implicit_wait(self, wait_time: int = None):
200        """Sets to default time if no arg given."""
201        if not wait_time:
202            self.browser.implicitly_wait(self.implicit_wait)
203        else:
204            self.browser.implicitly_wait(wait_time)
205
206    def open_browser(self):
207        """Configures and opens selenium browser."""
208        if not self.browser_open:
209            match self.browser_type:
210                case "firefox":
211                    self.configure_firefox()
212                    self.browser = webdriver.Firefox(
213                        options=self.options,
214                        service=self.service,
215                        firefox_profile=self.profile,
216                    )
217                case "chrome":
218                    self.configure_chrome()
219                    self.browser = webdriver.Chrome(
220                        options=self.options, service=self.service
221                    )
222            self.set_implicit_wait()
223            self.browser.maximize_window()
224            self.browser.set_window_position(
225                self.move_window_by[0], self.move_window_by[1]
226            )
227            self.browser.maximize_window()
228            self.browser.set_page_load_timeout(self.page_load_timeout)
229            self.browser_open = True
230            self.tab_index = 0
231            self.rotation_timer.start()
232        else:
233            warn("Browser already open.")
234
235    def close_browser(self):
236        """Close browser window."""
237        if self.browser_open:
238            self.browser_open = False
239            self.browser.quit()
240
241    def open_tab(self, url: str = "", switch_to_tab: bool = True):
242        """Opens new tab and, if provided, goes to url.
243
244        New tab is inserted after currently active tab."""
245        self.script("window.open(arguments[0]);", url)
246        if switch_to_tab:
247            self.switch_to_tab(self.tab_index + 1)
248
249    def switch_to_tab(self, tab_index: int):
250        """Switch to a tab in browser, zero indexed."""
251        self.browser.switch_to.window(self.browser.window_handles[tab_index])
252        self.tab_index = tab_index
253
254    def get_num_tabs(self) -> int:
255        """Returns number of tabs open."""
256        return len(self.browser.window_handles)
257
258    def close_tab(self, tab_index: int = 1):
259        """Close specified tab and
260        switches to tab index 0."""
261        self.switch_to_tab(tab_index)
262        self.browser.close()
263        self.switch_to_tab(0)
264
265    def get(self, url: str):
266        """Requests webpage at given url and rotates userAgent if necessary."""
267        if not self.browser_open:
268            self.open_browser()
269        if (
270            self.randomize_user_agent
271            and self.user_agent_rotation_period is not None
272            and self.rotation_timer.elapsed > (60 * self.user_agent_rotation_period)
273        ):
274            self.rotation_timer.stop()
275            self.close_browser()
276            self.open_browser()
277        self.browser.get(url)
278        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
279        self.chill(self.arrival_wait)
280
281    def get_soup(self) -> BeautifulSoup:
282        """Returns a BeautifulSoup object
283        of the current page source."""
284        return BeautifulSoup(self.browser.page_source, "html.parser")
285
286    def current_url(self) -> str:
287        """Returns current url of active tab."""
288        return self.browser.current_url
289
290    def delete_cookies(self):
291        """Delete all cookies for
292        this browser instance."""
293        self.browser.delete_all_cookies()
294
295    def turbo(self, engage: bool = True):
296        """When engaged, strings will be sent
297        to elements all at once and there will be
298        no waiting after actions.
299
300        When disengaged, strings will be sent to elements
301        'one key at a time' with randomized amounts of
302        time between successive keys and after actions."""
303        if engage:
304            self.after_key_wait = (0, 0)
305            self.after_field_wait = (0, 0)
306            self.after_click_wait = (0, 0)
307            self.arrival_wait = (1, 1)
308            self.one_key_at_a_time = False
309            self.turbo_engaged = True
310        else:
311            self.after_key_wait = (0.1, 0.5)
312            self.after_field_wait = (1, 2)
313            self.after_click_wait = (0.25, 1.5)
314            self.arrival_wait = (4, 10)
315            self.one_key_at_a_time = True
316            self.turbo_engaged = False
317
318    def chill(self, min_max: tuple[float, float]):
319        """Sleeps a random amount
320        between min_max[0] and min_max[1]."""
321        time.sleep(random.uniform(min_max[0], min_max[1]))
322
323    def script(self, script: str, args: Any = None) -> Any:
324        """Execute javascript code and returns result."""
325        return self.browser.execute_script(script, args)
326
327    def remove(self, locator: str):
328        """Removes element from DOM."""
329        self.script("arguments[0].remove();", self.find(locator))
330
331    def get_length(self, locator: str) -> int:
332        """Returns number of child elements for a given element."""
333        return int(self.script("return arguments[0].length;", self.find(locator)))
334
335    def find(self, locator: str) -> WebElement:
336        """Finds and returns a WebElement."""
337        match self.locator_method:
338            case "xpath":
339                return self.browser.find_element(By.XPATH, locator)
340            case "id":
341                return self.browser.find_element(By.ID, locator)
342            case "className":
343                return self.browser.find_element(By.CLASS_NAME, locator)
344            case "name":
345                return self.browser.find_element(By.NAME, locator)
346            case "cssSelector":
347                return self.browser.find_element(By.CSS_SELECTOR, locator)
348
349    def find_children(self, locator: str) -> list[WebElement]:
350        """Returns a list of child WebElements
351        for given locator arg."""
352        element = self.find(locator)
353        return element.find_elements("xpath", "./*")
354
355    def scroll(self, amount: int = None, fraction: float = None):
356        """Scroll web page.
357        :param amount: The number of lines to scroll if not None.
358
359        :param fraction: The amount between 0.0 and 1.0
360        of the page height to scroll.
361
362        If values are provided for both arguments,
363        amount will be used.
364
365        If values are provided for neither argument,
366        the entire page length will be scrolled.
367
368        Scrolls one line at a time if self.turbo is False."""
369        if amount:
370            amount_to_scroll = amount
371        elif fraction:
372            amount_to_scroll = int(
373                fraction
374                * (
375                    int(self.script("return document.body.scrollHeight;"))
376                    - int(self.script("return window.pageYOffset;"))
377                )
378            )
379        else:
380            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
381        if self.turbo_engaged:
382            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
383        else:
384            for _ in range(abs(amount_to_scroll)):
385                if amount_to_scroll >= 0:
386                    self.script("window.scrollBy(0,1);")
387                else:
388                    self.script("window.scrollBy(0,-1);")
389        self.chill(self.after_click_wait)
390
391    def scroll_into_view(self, locator: str) -> WebElement:
392        """Scrolls to a given element and returns the element."""
393        element = self.find(locator)
394        self.script("arguments[0].scroll_into_view();", element)
395        self.chill(self.after_click_wait)
396        return element
397
398    def text(self, locator: str) -> str:
399        """Returns text of WebElement."""
400        return self.find(locator).text
401
402    def click(self, locator: str) -> WebElement:
403        """Clicks on and returns WebElement."""
404        element = self.find(locator)
405        element.click()
406        self.chill(self.after_click_wait)
407        return element
408
409    def clear(self, locator: str) -> WebElement:
410        """Clears content of WebElement if able
411        and then returns WebElement."""
412        element = self.find(locator)
413        element.clear()
414        self.chill(self.after_click_wait)
415        return element
416
417    def switch_to_iframe(self, locator: str):
418        """Switch to an iframe from given locator."""
419        self.browser.switch_to.frame(self.find(locator))
420
421    def switch_to_parent_frame(self):
422        """Move up a frame level from current frame."""
423        self.browser.switch_to.parent_frame()
424
425    def select(
426        self, locator: str, method: str, choice: str | int | tuple
427    ) -> WebElement:
428        """Select a choice from Select element.
429        Returns the Select element from the locator string,
430        not the option element that is selected.
431
432        :param method: Can be 'value' or 'index'
433
434        :param choice: The option to select.
435
436        If method is 'value', then choice should be
437        the html 'value' attribute of the desired option.
438
439        If method is 'index', choice can either be a single
440        int for the desired option or it can be a two-tuple.
441        If the tuple is provided, a random option between the
442        two indicies (inclusive) will be selected."""
443        element = self.click(locator)
444        match method:
445            case "value":
446                Select(element).select_by_value(choice)
447            case "index":
448                if type(choice) == tuple:
449                    choice = random.randint(choice[0], choice[1])
450                Select(element).select_by_index(choice)
451        self.chill(self.after_field_wait)
452        return element
453
454    def click_elements(
455        self, locators: list[str], max_selections: int = None, min_selections: int = 1
456    ) -> WebElement:
457        """Click a random number of WebElements
458        and return the last WebElement clicked.
459
460        :param locators: A list of element locators to choose from.
461
462        :param max_selections: The maximum number of elements to click.
463        If None, the maximum will be the length of the locators list.
464
465        :param min_selections: The minimum number of elements to click.
466
467        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
468        will click between 1 and 3 random elements from the list.
469        """
470        if not max_selections:
471            max_selections = len(locators)
472        for option in random.sample(
473            locators, k=random.randint(min_selections, max_selections)
474        ):
475            element = self.click(option)
476        return element
477
478    def get_click_list(
479        self, num_options: int, max_choices: int = 1, min_choices: int = 1
480    ) -> list[str]:
481        """Similar to self.click_elements(), but for use with the self.fill_next() method.
482
483        Creates a list of length 'num_options' where every element is 'skip'.
484
485        A random number of elements in the list between 'min_choices' and 'max_choices' are
486        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
487        click_list = ["skip"] * num_options
488        selected_indexes = []
489        for i in range(random.randint(min_choices, max_choices)):
490            index = random.randint(0, num_options - 1)
491            while index in selected_indexes:
492                index = random.randint(0, num_options - 1)
493            selected_indexes.append(index)
494            click_list[index] = self.keys.SPACE
495        return click_list
496
497    def send_keys(
498        self,
499        locator: str,
500        data: str,
501        click_first: bool = True,
502        clear_first: bool = False,
503    ) -> WebElement:
504        """Types data into element and returns the element.
505
506        :param data: The string to send to the element.
507
508        :param click_first: If True, the element is clicked on
509        before the data is sent.
510
511        :param clear_first: If True, the current text of the element
512        is cleared before the data is sent."""
513        element = self.click(locator) if click_first else self.find(locator)
514        if clear_first:
515            element.clear()
516            self.chill(self.after_click_wait)
517        if self.one_key_at_a_time:
518            for ch in str(data):
519                element.send_keys(ch)
520                self.chill(self.after_key_wait)
521        else:
522            element.send_keys(str(data))
523        self.chill(self.after_field_wait)
524        return element
525
526    def fill_next(
527        self, data: list[str | tuple], start_element: WebElement = None
528    ) -> WebElement:
529        """Fills a form by tabbing from the current WebElement
530        to the next one and using the corresponding item in data.
531        Returns the last WebElement.
532
533        :param data: A list of form data. If an item is a string (except for 'skip')
534        it will be typed into the current WebElement.
535
536        An item in data can be a two-tuple of the form
537        ('downArrow', numberOfPresses:int|tuple[int, int]).
538
539        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
540        that many times to the WebElement.
541
542        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
543        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
544        This is typically for use with Select elements.
545
546        An item in data can also be 'skip', which will perform no action on the current
547        WebElement and will continue to the next one.
548
549        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
550        representing a percent chance an element will be clicked or skipped:
551        >>> user.fill_next(["click=70"])
552
553        has a 70% chance of being
554        >>> user.fill_next([user.keys.SPACE])
555
556        and a 30% chance of being
557        >>> user.fill_next(["skip"])
558
559
560        :param start_element: The WebElement to start tabbing from.
561        The currently active element will be used if start_element is None.
562
563        Note: The function tabs to the next element before sending data,
564        so the start_element should the WebElement before the one
565        that should receive data[0].
566        """
567        element = (
568            self.browser.switch_to.active_element
569            if not start_element
570            else start_element
571        )
572        for datum in data:
573            element.send_keys(Keys.TAB)
574            element = self.browser.switch_to.active_element
575            self.chill(self.after_key_wait)
576            if type(datum) == str and datum.strip().startswith("click="):
577                chance = int(datum.split("=")[1].strip())
578                if random.randint(0, 100) <= chance:
579                    datum = Keys.SPACE
580                else:
581                    datum = "skip"
582            if datum[0] == "downArrow":
583                if type(datum[1]) == tuple:
584                    times = random.randint(datum[1][0], datum[1][1])
585                else:
586                    times = datum[1]
587                for _ in range(times):
588                    element.send_keys(Keys.ARROW_DOWN)
589                    self.chill(self.after_key_wait)
590            elif datum == "skip":
591                self.chill(self.after_key_wait)
592            else:
593
594                if self.turbo_engaged:
595                    element.send_keys(str(datum))
596                else:
597                    for ch in str(datum):
598                        element.send_keys(ch)
599                        self.chill(self.after_key_wait)
600            self.chill(self.after_field_wait)
601        return element
602
603    def wait_until(
604        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
605    ):
606        """Checks condition repeatedly until either it is true,
607        or the max_wait is exceeded.
608
609        Raises a TimeoutError if the condition doesn't success within max_wait.
610
611        Useful for determing whether a form has been successfully submitted.
612
613        :param condition: The condition function to check.
614
615        :param max_wait: Number of seconds to continue checking condition
616        before throwing a TimeoutError.
617
618        :param polling_interval: The number of seconds to sleep before
619        checking the condition function again after it fails.
620
621        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
622        start_time = time.time()
623        while True:
624            try:
625                if condition():
626                    time.sleep(1)
627                    break
628                elif (time.time() - start_time) > max_wait:
629                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
630                else:
631                    time.sleep(polling_interval)
632            except:
633                if (time.time() - start_time) > max_wait:
634                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
635                else:
636                    time.sleep(polling_interval)
637
638    def dismiss_alert(self):
639        """Dismiss alert dialog."""
640        self.browser.switch_to.alert.dismiss()
641
642    def solve_recaptcha_v3(
643        self,
644        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
645        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
646    ):
647        """Pass google recaptcha v3 by solving an audio puzzle.
648
649        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
650        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
651        pass None to this argument.
652
653        """
654        locator_method = self.locator_method
655        self.locator_method = "xpath"
656        try:
657            if outer_iframe_xpath:
658                self.switch_to_iframe(outer_iframe_xpath)
659                self.click('//*[@id="recaptcha-anchor"]')
660                self.switch_to_parent_frame()
661            self.switch_to_iframe(inner_iframe_xpath)
662            self.click('//*[@id="recaptcha-audio-button"]')
663            mp3_url = self.find(
664                '//a[@class="rc-audiochallenge-tdownload-link"]'
665            ).get_attribute("href")
666            text = get_text_from_url(mp3_url, ".mp3")
667            self.send_keys('//*[@id="audio-response"]', text)
668            self.click('//*[@id="recaptcha-verify-button"]')
669        except Exception as e:
670            print(e)
671            raise Exception("Could not solve captcha")
672        finally:
673            self.switch_to_parent_frame()
674            self.locator_method = locator_method

Sits on top of selenium to streamline automation and scraping tasks.

User( headless: bool = False, browser_type: str = 'firefox', implicit_wait: int = 10, page_load_timeout: int = 60, open_browser: bool = True, locator_method: str = 'xpath', randomize_user_agent: bool = True, user_agent_rotation_period: int = None, move_window_by: tuple[int, int] = (0, -1000), download_dir: str | pathlib.Path = None, driver_path: str | pathlib.Path = None)
 33    def __init__(
 34        self,
 35        headless: bool = False,
 36        browser_type: str = "firefox",
 37        implicit_wait: int = 10,
 38        page_load_timeout: int = 60,
 39        open_browser: bool = True,
 40        locator_method: str = "xpath",
 41        randomize_user_agent: bool = True,
 42        user_agent_rotation_period: int = None,
 43        move_window_by: tuple[int, int] = (0, -1000),
 44        download_dir: str | Path = None,
 45        driver_path: str | Path = None,
 46    ):
 47        """
 48        :param headless: If True, browser window will not be visible.
 49
 50        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 51
 52        :param implicit_wait: Number of seconds to look for a specified element before
 53        selenium considers it missing and throws an exception.
 54
 55        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 56        before throwing an exception.
 57
 58        :param open_browser: If True, opens a browser window when a User object is created.
 59        If False, a manual call to self.open_browser() must be made.
 60
 61        :param locator_method: The locator type User should expect to be given.
 62        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 63        Every member function with a 'locator' argument refers to a string matching
 64        the current locator_method.
 65
 66        :param randomize_user_agent: If True, a random useragent will be used whenever
 67        the browser is opened. If False, the native useragent will be used.
 68
 69        :param user_agent_rotation_period: If not None, the browser window will be closed
 70        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 71        Rotation occurs on the first call to self.get() after the time period has elapsed.
 72        Ignored if randomize_user_agent is False.
 73
 74        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 75
 76        :param download_dir: The download folder to use. If None, the default folder will be used.
 77
 78        :param driver_path: The path to the webdriver executable selenium should use.
 79        If None, the system PATH will be checked for the executable.
 80        If the executable isn't found, the parent directories and the immediate child directories
 81        of the current working directory will be searched.
 82        """
 83        self.headless = headless
 84        browser_type = browser_type.lower()
 85        if browser_type in ["firefox", "chrome"]:
 86            self.browser_type = browser_type
 87        else:
 88            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 89        self.browser_open = False
 90        self.implicit_wait = implicit_wait
 91        self.page_load_timeout = page_load_timeout
 92        self.rotation_timer = Timer()
 93        self.randomize_user_agent = randomize_user_agent
 94        self.user_agent_rotation_period = user_agent_rotation_period
 95        self.locator_method = locator_method
 96        self.turbo()
 97        self.keys = Keys
 98        self.move_window_by = move_window_by
 99        self.download_dir = download_dir
100        self.driver_path = driver_path
101        if not self.driver_path:
102            self.search_for_driver()
103        if open_browser:
104            self.open_browser()
105        else:
106            self.browser = None
107        atexit.register(self.close_browser)
Parameters
  • headless: If True, browser window will not be visible.

  • browser_type: Which browser to use. Can be 'firefox' or 'chrome'.

  • implicit_wait: Number of seconds to look for a specified element before selenium considers it missing and throws an exception.

  • page_load_timeout: Time in seconds for selenium to wait for a page to load before throwing an exception.

  • open_browser: If True, opens a browser window when a User object is created. If False, a manual call to self.open_browser() must be made.

  • locator_method: The locator type User should expect to be given. Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'. Every member function with a 'locator' argument refers to a string matching the current locator_method.

  • randomize_user_agent: If True, a random useragent will be used whenever the browser is opened. If False, the native useragent will be used.

  • user_agent_rotation_period: If not None, the browser window will be closed and reopened with a new useragent every user_agent_rotation_period number of minutes. Rotation occurs on the first call to self.get() after the time period has elapsed. Ignored if randomize_user_agent is False.

  • move_window_by: The x and y amount of pixels to move the browser window by after opening.

  • download_dir: The download folder to use. If None, the default folder will be used.

  • driver_path: The path to the webdriver executable selenium should use. If None, the system PATH will be checked for the executable. If the executable isn't found, the parent directories and the immediate child directories of the current working directory will be searched.

def configure_firefox(self) -> selenium.webdriver.firefox.service.Service:
115    def configure_firefox(self) -> FirefoxService:
116        """Configure options and profile for firefox."""
117        self.options = FirefoxOptions()
118        self.options.headless = self.headless
119        self.options.set_preference(
120            "widget.windows.window_occlusion_tracking.enabled", False
121        )
122        self.options.set_preference("dom.webaudio.enabled", False)
123        if self.randomize_user_agent:
124            self.options.set_preference("general.useragent.override", get_agent())
125        if self.download_dir:
126            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
127            self.profile = FirefoxProfile()
128            self.profile.set_preference("browser.download.dir", str(self.download_dir))
129            self.profile.set_preference("browser.download.folderList", 2)
130        else:
131            self.profile = None
132        self.service = FirefoxService(
133            executable_path=str(self.driver_path), log_path=os.devnull
134        )

Configure options and profile for firefox.

def configure_chrome(self) -> selenium.webdriver.chrome.service.Service:
136    def configure_chrome(self) -> ChromeService:
137        """Configure options and profile for chrome."""
138        self.options = ChromeOptions()
139        self.options.headless = self.headless
140        self.options.add_argument("--disable-blink-features=AutomationControlled")
141        self.options.add_argument("--mute-audio")
142        self.options.add_argument("--disable-infobars")
143        self.options.add_argument("--disable-notifications")
144        self.options.add_argument("--log-level=3")
145        if self.randomize_user_agent:
146            self.options.add_argument(f"--user-agent={get_agent()}")
147        self.options.add_experimental_option("useAutomationExtension", False)
148        if self.download_dir:
149            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
150            self.options.add_experimental_option(
151                "prefs", {"download.default_directory": str(self.download_dir)}
152            )
153        self.service = ChromeService(
154            executable_path=str(self.driver_path), log_path=os.devnull
155        )

Configure options and profile for chrome.

def search_for_driver(self):
157    def search_for_driver(self):
158        """Searches for the webdriver executable."""
159        cwd = Path.cwd()
160        found = False
161        match self.browser_type:
162            case "firefox":
163                driver = "geckodriver.exe"
164            case "chrome":
165                driver = "chromedriver.exe"
166        # search PATH
167        env_path = os.environ["PATH"]
168        if sys.platform == "win32":
169            env_paths = env_path.split(";")
170        else:
171            env_paths = env_path.split(":")
172            driver = driver[: driver.find(".")]
173        for path in env_paths:
174            if (Path(path) / driver).exists():
175                self.driver_path = Path(path) / driver
176                found = True
177                break
178        # check current working directory and parent folders
179        if not found:
180            while cwd != cwd.parent:
181                if (cwd / driver).exists():
182                    self.driver_path = cwd / driver
183                    found = True
184                    break
185                cwd = cwd.parent
186            # check top most level
187            if not found and (cwd / driver).exists():
188                self.driver_path = cwd / driver
189                found = True
190        # check child folders (only 1 level down)
191        if not found:
192            for child in Path.cwd().iterdir():
193                if child.is_dir() and (child / driver).exists():
194                    self.driver_path = child / driver
195                    found = True
196        if not found:
197            warn(f"Could not find {driver}")

Searches for the webdriver executable.

def set_implicit_wait(self, wait_time: int = None):
199    def set_implicit_wait(self, wait_time: int = None):
200        """Sets to default time if no arg given."""
201        if not wait_time:
202            self.browser.implicitly_wait(self.implicit_wait)
203        else:
204            self.browser.implicitly_wait(wait_time)

Sets to default time if no arg given.

def open_browser(self):
206    def open_browser(self):
207        """Configures and opens selenium browser."""
208        if not self.browser_open:
209            match self.browser_type:
210                case "firefox":
211                    self.configure_firefox()
212                    self.browser = webdriver.Firefox(
213                        options=self.options,
214                        service=self.service,
215                        firefox_profile=self.profile,
216                    )
217                case "chrome":
218                    self.configure_chrome()
219                    self.browser = webdriver.Chrome(
220                        options=self.options, service=self.service
221                    )
222            self.set_implicit_wait()
223            self.browser.maximize_window()
224            self.browser.set_window_position(
225                self.move_window_by[0], self.move_window_by[1]
226            )
227            self.browser.maximize_window()
228            self.browser.set_page_load_timeout(self.page_load_timeout)
229            self.browser_open = True
230            self.tab_index = 0
231            self.rotation_timer.start()
232        else:
233            warn("Browser already open.")

Configures and opens selenium browser.

def close_browser(self):
235    def close_browser(self):
236        """Close browser window."""
237        if self.browser_open:
238            self.browser_open = False
239            self.browser.quit()

Close browser window.

def open_tab(self, url: str = '', switch_to_tab: bool = True):
241    def open_tab(self, url: str = "", switch_to_tab: bool = True):
242        """Opens new tab and, if provided, goes to url.
243
244        New tab is inserted after currently active tab."""
245        self.script("window.open(arguments[0]);", url)
246        if switch_to_tab:
247            self.switch_to_tab(self.tab_index + 1)

Opens new tab and, if provided, goes to url.

New tab is inserted after currently active tab.

def switch_to_tab(self, tab_index: int):
249    def switch_to_tab(self, tab_index: int):
250        """Switch to a tab in browser, zero indexed."""
251        self.browser.switch_to.window(self.browser.window_handles[tab_index])
252        self.tab_index = tab_index

Switch to a tab in browser, zero indexed.

def get_num_tabs(self) -> int:
254    def get_num_tabs(self) -> int:
255        """Returns number of tabs open."""
256        return len(self.browser.window_handles)

Returns number of tabs open.

def close_tab(self, tab_index: int = 1):
258    def close_tab(self, tab_index: int = 1):
259        """Close specified tab and
260        switches to tab index 0."""
261        self.switch_to_tab(tab_index)
262        self.browser.close()
263        self.switch_to_tab(0)

Close specified tab and switches to tab index 0.

def get(self, url: str):
265    def get(self, url: str):
266        """Requests webpage at given url and rotates userAgent if necessary."""
267        if not self.browser_open:
268            self.open_browser()
269        if (
270            self.randomize_user_agent
271            and self.user_agent_rotation_period is not None
272            and self.rotation_timer.elapsed > (60 * self.user_agent_rotation_period)
273        ):
274            self.rotation_timer.stop()
275            self.close_browser()
276            self.open_browser()
277        self.browser.get(url)
278        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
279        self.chill(self.arrival_wait)

Requests webpage at given url and rotates userAgent if necessary.

def get_soup(self) -> bs4.BeautifulSoup:
281    def get_soup(self) -> BeautifulSoup:
282        """Returns a BeautifulSoup object
283        of the current page source."""
284        return BeautifulSoup(self.browser.page_source, "html.parser")

Returns a BeautifulSoup object of the current page source.

def current_url(self) -> str:
286    def current_url(self) -> str:
287        """Returns current url of active tab."""
288        return self.browser.current_url

Returns current url of active tab.

def delete_cookies(self):
290    def delete_cookies(self):
291        """Delete all cookies for
292        this browser instance."""
293        self.browser.delete_all_cookies()

Delete all cookies for this browser instance.

def turbo(self, engage: bool = True):
295    def turbo(self, engage: bool = True):
296        """When engaged, strings will be sent
297        to elements all at once and there will be
298        no waiting after actions.
299
300        When disengaged, strings will be sent to elements
301        'one key at a time' with randomized amounts of
302        time between successive keys and after actions."""
303        if engage:
304            self.after_key_wait = (0, 0)
305            self.after_field_wait = (0, 0)
306            self.after_click_wait = (0, 0)
307            self.arrival_wait = (1, 1)
308            self.one_key_at_a_time = False
309            self.turbo_engaged = True
310        else:
311            self.after_key_wait = (0.1, 0.5)
312            self.after_field_wait = (1, 2)
313            self.after_click_wait = (0.25, 1.5)
314            self.arrival_wait = (4, 10)
315            self.one_key_at_a_time = True
316            self.turbo_engaged = False

When engaged, strings will be sent to elements all at once and there will be no waiting after actions.

When disengaged, strings will be sent to elements 'one key at a time' with randomized amounts of time between successive keys and after actions.

def chill(self, min_max: tuple[float, float]):
318    def chill(self, min_max: tuple[float, float]):
319        """Sleeps a random amount
320        between min_max[0] and min_max[1]."""
321        time.sleep(random.uniform(min_max[0], min_max[1]))

Sleeps a random amount between min_max[0] and min_max[1].

def script(self, script: str, args: Any = None) -> Any:
323    def script(self, script: str, args: Any = None) -> Any:
324        """Execute javascript code and returns result."""
325        return self.browser.execute_script(script, args)

Execute javascript code and returns result.

def remove(self, locator: str):
327    def remove(self, locator: str):
328        """Removes element from DOM."""
329        self.script("arguments[0].remove();", self.find(locator))

Removes element from DOM.

def get_length(self, locator: str) -> int:
331    def get_length(self, locator: str) -> int:
332        """Returns number of child elements for a given element."""
333        return int(self.script("return arguments[0].length;", self.find(locator)))

Returns number of child elements for a given element.

def find(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
335    def find(self, locator: str) -> WebElement:
336        """Finds and returns a WebElement."""
337        match self.locator_method:
338            case "xpath":
339                return self.browser.find_element(By.XPATH, locator)
340            case "id":
341                return self.browser.find_element(By.ID, locator)
342            case "className":
343                return self.browser.find_element(By.CLASS_NAME, locator)
344            case "name":
345                return self.browser.find_element(By.NAME, locator)
346            case "cssSelector":
347                return self.browser.find_element(By.CSS_SELECTOR, locator)

Finds and returns a WebElement.

def find_children( self, locator: str) -> list[selenium.webdriver.remote.webelement.WebElement]:
349    def find_children(self, locator: str) -> list[WebElement]:
350        """Returns a list of child WebElements
351        for given locator arg."""
352        element = self.find(locator)
353        return element.find_elements("xpath", "./*")

Returns a list of child WebElements for given locator arg.

def scroll(self, amount: int = None, fraction: float = None):
355    def scroll(self, amount: int = None, fraction: float = None):
356        """Scroll web page.
357        :param amount: The number of lines to scroll if not None.
358
359        :param fraction: The amount between 0.0 and 1.0
360        of the page height to scroll.
361
362        If values are provided for both arguments,
363        amount will be used.
364
365        If values are provided for neither argument,
366        the entire page length will be scrolled.
367
368        Scrolls one line at a time if self.turbo is False."""
369        if amount:
370            amount_to_scroll = amount
371        elif fraction:
372            amount_to_scroll = int(
373                fraction
374                * (
375                    int(self.script("return document.body.scrollHeight;"))
376                    - int(self.script("return window.pageYOffset;"))
377                )
378            )
379        else:
380            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
381        if self.turbo_engaged:
382            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
383        else:
384            for _ in range(abs(amount_to_scroll)):
385                if amount_to_scroll >= 0:
386                    self.script("window.scrollBy(0,1);")
387                else:
388                    self.script("window.scrollBy(0,-1);")
389        self.chill(self.after_click_wait)

Scroll web page.

Parameters
  • amount: The number of lines to scroll if not None.

  • fraction: The amount between 0.0 and 1.0 of the page height to scroll.

If values are provided for both arguments, amount will be used.

If values are provided for neither argument, the entire page length will be scrolled.

Scrolls one line at a time if self.turbo is False.

def scroll_into_view(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
391    def scroll_into_view(self, locator: str) -> WebElement:
392        """Scrolls to a given element and returns the element."""
393        element = self.find(locator)
394        self.script("arguments[0].scroll_into_view();", element)
395        self.chill(self.after_click_wait)
396        return element

Scrolls to a given element and returns the element.

def text(self, locator: str) -> str:
398    def text(self, locator: str) -> str:
399        """Returns text of WebElement."""
400        return self.find(locator).text

Returns text of WebElement.

def click(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
402    def click(self, locator: str) -> WebElement:
403        """Clicks on and returns WebElement."""
404        element = self.find(locator)
405        element.click()
406        self.chill(self.after_click_wait)
407        return element

Clicks on and returns WebElement.

def clear(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
409    def clear(self, locator: str) -> WebElement:
410        """Clears content of WebElement if able
411        and then returns WebElement."""
412        element = self.find(locator)
413        element.clear()
414        self.chill(self.after_click_wait)
415        return element

Clears content of WebElement if able and then returns WebElement.

def switch_to_iframe(self, locator: str):
417    def switch_to_iframe(self, locator: str):
418        """Switch to an iframe from given locator."""
419        self.browser.switch_to.frame(self.find(locator))

Switch to an iframe from given locator.

def switch_to_parent_frame(self):
421    def switch_to_parent_frame(self):
422        """Move up a frame level from current frame."""
423        self.browser.switch_to.parent_frame()

Move up a frame level from current frame.

def select( self, locator: str, method: str, choice: str | int | tuple) -> selenium.webdriver.remote.webelement.WebElement:
425    def select(
426        self, locator: str, method: str, choice: str | int | tuple
427    ) -> WebElement:
428        """Select a choice from Select element.
429        Returns the Select element from the locator string,
430        not the option element that is selected.
431
432        :param method: Can be 'value' or 'index'
433
434        :param choice: The option to select.
435
436        If method is 'value', then choice should be
437        the html 'value' attribute of the desired option.
438
439        If method is 'index', choice can either be a single
440        int for the desired option or it can be a two-tuple.
441        If the tuple is provided, a random option between the
442        two indicies (inclusive) will be selected."""
443        element = self.click(locator)
444        match method:
445            case "value":
446                Select(element).select_by_value(choice)
447            case "index":
448                if type(choice) == tuple:
449                    choice = random.randint(choice[0], choice[1])
450                Select(element).select_by_index(choice)
451        self.chill(self.after_field_wait)
452        return element

Select a choice from Select element. Returns the Select element from the locator string, not the option element that is selected.

Parameters
  • method: Can be 'value' or 'index'

  • choice: The option to select.

If method is 'value', then choice should be the html 'value' attribute of the desired option.

If method is 'index', choice can either be a single int for the desired option or it can be a two-tuple. If the tuple is provided, a random option between the two indicies (inclusive) will be selected.

def click_elements( self, locators: list[str], max_selections: int = None, min_selections: int = 1) -> selenium.webdriver.remote.webelement.WebElement:
454    def click_elements(
455        self, locators: list[str], max_selections: int = None, min_selections: int = 1
456    ) -> WebElement:
457        """Click a random number of WebElements
458        and return the last WebElement clicked.
459
460        :param locators: A list of element locators to choose from.
461
462        :param max_selections: The maximum number of elements to click.
463        If None, the maximum will be the length of the locators list.
464
465        :param min_selections: The minimum number of elements to click.
466
467        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
468        will click between 1 and 3 random elements from the list.
469        """
470        if not max_selections:
471            max_selections = len(locators)
472        for option in random.sample(
473            locators, k=random.randint(min_selections, max_selections)
474        ):
475            element = self.click(option)
476        return element

Click a random number of WebElements and return the last WebElement clicked.

Parameters
  • locators: A list of element locators to choose from.

  • max_selections: The maximum number of elements to click. If None, the maximum will be the length of the locators list.

  • min_selections: The minimum number of elements to click.

e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3) will click between 1 and 3 random elements from the list.

def get_click_list( self, num_options: int, max_choices: int = 1, min_choices: int = 1) -> list[str]:
478    def get_click_list(
479        self, num_options: int, max_choices: int = 1, min_choices: int = 1
480    ) -> list[str]:
481        """Similar to self.click_elements(), but for use with the self.fill_next() method.
482
483        Creates a list of length 'num_options' where every element is 'skip'.
484
485        A random number of elements in the list between 'min_choices' and 'max_choices' are
486        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
487        click_list = ["skip"] * num_options
488        selected_indexes = []
489        for i in range(random.randint(min_choices, max_choices)):
490            index = random.randint(0, num_options - 1)
491            while index in selected_indexes:
492                index = random.randint(0, num_options - 1)
493            selected_indexes.append(index)
494            click_list[index] = self.keys.SPACE
495        return click_list

Similar to self.click_elements(), but for use with the self.fill_next() method.

Creates a list of length 'num_options' where every element is 'skip'.

A random number of elements in the list between 'min_choices' and 'max_choices' are replaced with 'keys.SPACE' (interpreted as a click by almost all web forms).

def send_keys( self, locator: str, data: str, click_first: bool = True, clear_first: bool = False) -> selenium.webdriver.remote.webelement.WebElement:
497    def send_keys(
498        self,
499        locator: str,
500        data: str,
501        click_first: bool = True,
502        clear_first: bool = False,
503    ) -> WebElement:
504        """Types data into element and returns the element.
505
506        :param data: The string to send to the element.
507
508        :param click_first: If True, the element is clicked on
509        before the data is sent.
510
511        :param clear_first: If True, the current text of the element
512        is cleared before the data is sent."""
513        element = self.click(locator) if click_first else self.find(locator)
514        if clear_first:
515            element.clear()
516            self.chill(self.after_click_wait)
517        if self.one_key_at_a_time:
518            for ch in str(data):
519                element.send_keys(ch)
520                self.chill(self.after_key_wait)
521        else:
522            element.send_keys(str(data))
523        self.chill(self.after_field_wait)
524        return element

Types data into element and returns the element.

Parameters
  • data: The string to send to the element.

  • click_first: If True, the element is clicked on before the data is sent.

  • clear_first: If True, the current text of the element is cleared before the data is sent.

def fill_next( self, data: list[str | tuple], start_element: selenium.webdriver.remote.webelement.WebElement = None) -> selenium.webdriver.remote.webelement.WebElement:
526    def fill_next(
527        self, data: list[str | tuple], start_element: WebElement = None
528    ) -> WebElement:
529        """Fills a form by tabbing from the current WebElement
530        to the next one and using the corresponding item in data.
531        Returns the last WebElement.
532
533        :param data: A list of form data. If an item is a string (except for 'skip')
534        it will be typed into the current WebElement.
535
536        An item in data can be a two-tuple of the form
537        ('downArrow', numberOfPresses:int|tuple[int, int]).
538
539        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
540        that many times to the WebElement.
541
542        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
543        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
544        This is typically for use with Select elements.
545
546        An item in data can also be 'skip', which will perform no action on the current
547        WebElement and will continue to the next one.
548
549        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
550        representing a percent chance an element will be clicked or skipped:
551        >>> user.fill_next(["click=70"])
552
553        has a 70% chance of being
554        >>> user.fill_next([user.keys.SPACE])
555
556        and a 30% chance of being
557        >>> user.fill_next(["skip"])
558
559
560        :param start_element: The WebElement to start tabbing from.
561        The currently active element will be used if start_element is None.
562
563        Note: The function tabs to the next element before sending data,
564        so the start_element should the WebElement before the one
565        that should receive data[0].
566        """
567        element = (
568            self.browser.switch_to.active_element
569            if not start_element
570            else start_element
571        )
572        for datum in data:
573            element.send_keys(Keys.TAB)
574            element = self.browser.switch_to.active_element
575            self.chill(self.after_key_wait)
576            if type(datum) == str and datum.strip().startswith("click="):
577                chance = int(datum.split("=")[1].strip())
578                if random.randint(0, 100) <= chance:
579                    datum = Keys.SPACE
580                else:
581                    datum = "skip"
582            if datum[0] == "downArrow":
583                if type(datum[1]) == tuple:
584                    times = random.randint(datum[1][0], datum[1][1])
585                else:
586                    times = datum[1]
587                for _ in range(times):
588                    element.send_keys(Keys.ARROW_DOWN)
589                    self.chill(self.after_key_wait)
590            elif datum == "skip":
591                self.chill(self.after_key_wait)
592            else:
593
594                if self.turbo_engaged:
595                    element.send_keys(str(datum))
596                else:
597                    for ch in str(datum):
598                        element.send_keys(ch)
599                        self.chill(self.after_key_wait)
600            self.chill(self.after_field_wait)
601        return element

Fills a form by tabbing from the current WebElement to the next one and using the corresponding item in data. Returns the last WebElement.

Parameters
  • data: A list of form data. If an item is a string (except for 'skip') it will be typed into the current WebElement.

An item in data can be a two-tuple of the form ('downArrow', numberOfPresses:int|tuple[int, int]).

If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent that many times to the WebElement.

If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random number of times between numberOfPresses[0] and numberOfPresses[1] inclusive. This is typically for use with Select elements.

An item in data can also be 'skip', which will perform no action on the current WebElement and will continue to the next one.

An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100, representing a percent chance an element will be clicked or skipped:

>>> user.fill_next(["click=70"])

has a 70% chance of being

>>> user.fill_next([user.keys.SPACE])

and a 30% chance of being

>>> user.fill_next(["skip"])
  • start_element: The WebElement to start tabbing from. The currently active element will be used if start_element is None.

Note: The function tabs to the next element before sending data, so the start_element should the WebElement before the one that should receive data[0].

def wait_until( self, condition: function, max_wait: float = 10, polling_interval: float = 0.1):
603    def wait_until(
604        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
605    ):
606        """Checks condition repeatedly until either it is true,
607        or the max_wait is exceeded.
608
609        Raises a TimeoutError if the condition doesn't success within max_wait.
610
611        Useful for determing whether a form has been successfully submitted.
612
613        :param condition: The condition function to check.
614
615        :param max_wait: Number of seconds to continue checking condition
616        before throwing a TimeoutError.
617
618        :param polling_interval: The number of seconds to sleep before
619        checking the condition function again after it fails.
620
621        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
622        start_time = time.time()
623        while True:
624            try:
625                if condition():
626                    time.sleep(1)
627                    break
628                elif (time.time() - start_time) > max_wait:
629                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
630                else:
631                    time.sleep(polling_interval)
632            except:
633                if (time.time() - start_time) > max_wait:
634                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
635                else:
636                    time.sleep(polling_interval)

Checks condition repeatedly until either it is true, or the max_wait is exceeded.

Raises a TimeoutError if the condition doesn't success within max_wait.

Useful for determing whether a form has been successfully submitted.

Parameters
  • condition: The condition function to check.

  • max_wait: Number of seconds to continue checking condition before throwing a TimeoutError.

  • polling_interval: The number of seconds to sleep before checking the condition function again after it fails.

e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))

def dismiss_alert(self):
638    def dismiss_alert(self):
639        """Dismiss alert dialog."""
640        self.browser.switch_to.alert.dismiss()

Dismiss alert dialog.

def solve_recaptcha_v3( self, outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]', inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]'):
642    def solve_recaptcha_v3(
643        self,
644        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
645        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
646    ):
647        """Pass google recaptcha v3 by solving an audio puzzle.
648
649        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
650        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
651        pass None to this argument.
652
653        """
654        locator_method = self.locator_method
655        self.locator_method = "xpath"
656        try:
657            if outer_iframe_xpath:
658                self.switch_to_iframe(outer_iframe_xpath)
659                self.click('//*[@id="recaptcha-anchor"]')
660                self.switch_to_parent_frame()
661            self.switch_to_iframe(inner_iframe_xpath)
662            self.click('//*[@id="recaptcha-audio-button"]')
663            mp3_url = self.find(
664                '//a[@class="rc-audiochallenge-tdownload-link"]'
665            ).get_attribute("href")
666            text = get_text_from_url(mp3_url, ".mp3")
667            self.send_keys('//*[@id="audio-response"]', text)
668            self.click('//*[@id="recaptcha-verify-button"]')
669        except Exception as e:
670            print(e)
671            raise Exception("Could not solve captcha")
672        finally:
673            self.switch_to_parent_frame()
674            self.locator_method = locator_method

Pass google recaptcha v3 by solving an audio puzzle.

Parameters
  • outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox. If it's the recaptcha without the initial checkbox that just shows the image puzzle, pass None to this argument.