subtotal.subtotal

  1import argparse
  2import time
  3from pathlib import Path
  4from urllib.parse import urlparse
  5
  6from printbuddies import ProgBar
  7from seleniumuser import User
  8
  9root = Path(__file__).parent
 10
 11
 12def get_args() -> argparse.Namespace:
 13    parser = argparse.ArgumentParser()
 14
 15    parser.add_argument("url", type=str, help=""" The url to find subdomains for. """)
 16
 17    parser.add_argument(
 18        "-o",
 19        "--output_file",
 20        type=str,
 21        default=None,
 22        help=""" Output file to dump subdomains to. 
 23        If unspecified, a folder named "subtotals" will
 24        be created in your current working directory and
 25        the results will be saved to {url}-subdomains.txt""",
 26    )
 27
 28    parser.add_argument(
 29        "-b",
 30        "--browser",
 31        type=str,
 32        default="firefox",
 33        help=""" Browser for selenium to use.
 34        Can be "firefox" or "chrome".
 35        The appropriate webdriver needs to be installed
 36        in your current working directory or in your PATH.""",
 37    )
 38
 39    args = parser.parse_args()
 40
 41    return args
 42
 43
 44class User(User):
 45    def expand_subdomains(self):
 46        """Expand the listing of subdomains until all are visible.
 47        Mostly using Javascript to deal with these nightmare shadow roots."""
 48
 49        script = """ 
 50        var moreButton = document.getElementsByName("domain-view")[0].shadowRoot.getElementById("report").children[3].children[0].shadowRoot.children[0].children[1].children[0].getElementsByClassName("load-more mt-3")[0]; 
 51        function getVisibleSubdomains() {
 52            return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children.length;
 53        }
 54        moreButton.click();
 55        return getVisibleSubdomains();
 56        """
 57        total_subdomains = self.script(
 58            'return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].shadowRoot.getElementById("info-badge").textContent;'
 59        )
 60        total_subdomains = total_subdomains.strip().strip(")").strip("(")
 61        if "K" in total_subdomains:
 62            total_subdomains = float(total_subdomains.split()[0]) * 1000
 63        total_subdomains = int(total_subdomains)
 64        bar = ProgBar(total_subdomains)
 65        while True:
 66            visible_subdomains = self.script(script)
 67            bar.display(
 68                prefix="Expanding subdomains", counter_override=visible_subdomains
 69            )
 70            if visible_subdomains >= total_subdomains:
 71                break
 72            else:
 73                time.sleep(1)
 74
 75    def get_subdomains(self) -> list[str]:
 76        script = """ return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children;"""
 77        subdomain_body = self.script(script)
 78        subdomains = sorted(
 79            list(
 80                set(
 81                    [
 82                        row.find_element("xpath", ".//div/a").text
 83                        for row in subdomain_body
 84                    ]
 85                )
 86            )
 87        )
 88        return subdomains
 89
 90
 91def get_root_domain(url: str) -> str:
 92    """Get root domain location from url.
 93    >>> print(get_root_domain("https://www.website.com"))
 94    >>> "website.com" """
 95    root_domain = urlparse(url.lower()).netloc
 96    if not root_domain:
 97        return url
 98    # Remove any leading "www." or subdomains
 99    if root_domain.count(".") > 1:
100        return root_domain[root_domain.rfind(".", 0, root_domain.rfind(".")) + 1 :]
101    return root_domain
102
103
104def main(args: argparse.Namespace = None):
105    if not args:
106        args = get_args()
107
108    args.url = get_root_domain(args.url)
109    if not args.output_file:
110        (Path.cwd() / "subtotals").mkdir(parents=True, exist_ok=True)
111        args.output_file = Path.cwd() / "subtotals" / f"{args.url}-subdomains.txt"
112
113    virustotal_url = f"https://www.virustotal.com/gui/domain/{args.url}/relations"
114    with User(
115        headless=True if args.browser == "firefox" else False, browser_type=args.browser
116    ) as user:
117        user.get(virustotal_url)
118        time.sleep(1)
119        try:
120            user.expand_subdomains()
121        except Exception as e:
122            try:
123                user.solve_recaptcha_v3()
124            except:
125                pass
126            time.sleep(1)
127            user.expand_subdomains()
128        subdomains = user.get_subdomains()
129    print(*subdomains, sep="\n")
130    print(f"Found {len(subdomains)} unique subdomains.")
131    args.output_file.write_text("\n".join(subdomains))
132
133
134if __name__ == "__main__":
135    main(get_args())
def get_args() -> argparse.Namespace:
13def get_args() -> argparse.Namespace:
14    parser = argparse.ArgumentParser()
15
16    parser.add_argument("url", type=str, help=""" The url to find subdomains for. """)
17
18    parser.add_argument(
19        "-o",
20        "--output_file",
21        type=str,
22        default=None,
23        help=""" Output file to dump subdomains to. 
24        If unspecified, a folder named "subtotals" will
25        be created in your current working directory and
26        the results will be saved to {url}-subdomains.txt""",
27    )
28
29    parser.add_argument(
30        "-b",
31        "--browser",
32        type=str,
33        default="firefox",
34        help=""" Browser for selenium to use.
35        Can be "firefox" or "chrome".
36        The appropriate webdriver needs to be installed
37        in your current working directory or in your PATH.""",
38    )
39
40    args = parser.parse_args()
41
42    return args
class User(seleniumuser.seleniumuser.User):
45class User(User):
46    def expand_subdomains(self):
47        """Expand the listing of subdomains until all are visible.
48        Mostly using Javascript to deal with these nightmare shadow roots."""
49
50        script = """ 
51        var moreButton = document.getElementsByName("domain-view")[0].shadowRoot.getElementById("report").children[3].children[0].shadowRoot.children[0].children[1].children[0].getElementsByClassName("load-more mt-3")[0]; 
52        function getVisibleSubdomains() {
53            return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children.length;
54        }
55        moreButton.click();
56        return getVisibleSubdomains();
57        """
58        total_subdomains = self.script(
59            'return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].shadowRoot.getElementById("info-badge").textContent;'
60        )
61        total_subdomains = total_subdomains.strip().strip(")").strip("(")
62        if "K" in total_subdomains:
63            total_subdomains = float(total_subdomains.split()[0]) * 1000
64        total_subdomains = int(total_subdomains)
65        bar = ProgBar(total_subdomains)
66        while True:
67            visible_subdomains = self.script(script)
68            bar.display(
69                prefix="Expanding subdomains", counter_override=visible_subdomains
70            )
71            if visible_subdomains >= total_subdomains:
72                break
73            else:
74                time.sleep(1)
75
76    def get_subdomains(self) -> list[str]:
77        script = """ return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children;"""
78        subdomain_body = self.script(script)
79        subdomains = sorted(
80            list(
81                set(
82                    [
83                        row.find_element("xpath", ".//div/a").text
84                        for row in subdomain_body
85                    ]
86                )
87            )
88        )
89        return subdomains

Sits on top of selenium to streamline automation and scraping tasks.

def expand_subdomains(self):
46    def expand_subdomains(self):
47        """Expand the listing of subdomains until all are visible.
48        Mostly using Javascript to deal with these nightmare shadow roots."""
49
50        script = """ 
51        var moreButton = document.getElementsByName("domain-view")[0].shadowRoot.getElementById("report").children[3].children[0].shadowRoot.children[0].children[1].children[0].getElementsByClassName("load-more mt-3")[0]; 
52        function getVisibleSubdomains() {
53            return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children.length;
54        }
55        moreButton.click();
56        return getVisibleSubdomains();
57        """
58        total_subdomains = self.script(
59            'return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].shadowRoot.getElementById("info-badge").textContent;'
60        )
61        total_subdomains = total_subdomains.strip().strip(")").strip("(")
62        if "K" in total_subdomains:
63            total_subdomains = float(total_subdomains.split()[0]) * 1000
64        total_subdomains = int(total_subdomains)
65        bar = ProgBar(total_subdomains)
66        while True:
67            visible_subdomains = self.script(script)
68            bar.display(
69                prefix="Expanding subdomains", counter_override=visible_subdomains
70            )
71            if visible_subdomains >= total_subdomains:
72                break
73            else:
74                time.sleep(1)

Expand the listing of subdomains until all are visible. Mostly using Javascript to deal with these nightmare shadow roots.

def get_subdomains(self) -> list[str]:
76    def get_subdomains(self) -> list[str]:
77        script = """ return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children;"""
78        subdomain_body = self.script(script)
79        subdomains = sorted(
80            list(
81                set(
82                    [
83                        row.find_element("xpath", ".//div/a").text
84                        for row in subdomain_body
85                    ]
86                )
87            )
88        )
89        return subdomains
Inherited Members
seleniumuser.seleniumuser.User
User
configure_firefox
configure_chrome
search_for_driver
set_implicit_wait
open_browser
close_browser
open_tab
switch_to_tab
get_num_tabs
close_tab
get
get_soup
current_url
delete_cookies
turbo
chill
script
remove
get_length
find
find_children
scroll
scroll_into_view
text
click
clear
switch_to_iframe
switch_to_parent_frame
select
click_elements
get_click_list
send_keys
fill_next
wait_until
dismiss_alert
solve_recaptcha_v3
def get_root_domain(url: str) -> str:
 92def get_root_domain(url: str) -> str:
 93    """Get root domain location from url.
 94    >>> print(get_root_domain("https://www.website.com"))
 95    >>> "website.com" """
 96    root_domain = urlparse(url.lower()).netloc
 97    if not root_domain:
 98        return url
 99    # Remove any leading "www." or subdomains
100    if root_domain.count(".") > 1:
101        return root_domain[root_domain.rfind(".", 0, root_domain.rfind(".")) + 1 :]
102    return root_domain

Get root domain location from url.

>>> print(get_root_domain("https://www.website.com"))
>>> "website.com"
def main(args: argparse.Namespace = None):
105def main(args: argparse.Namespace = None):
106    if not args:
107        args = get_args()
108
109    args.url = get_root_domain(args.url)
110    if not args.output_file:
111        (Path.cwd() / "subtotals").mkdir(parents=True, exist_ok=True)
112        args.output_file = Path.cwd() / "subtotals" / f"{args.url}-subdomains.txt"
113
114    virustotal_url = f"https://www.virustotal.com/gui/domain/{args.url}/relations"
115    with User(
116        headless=True if args.browser == "firefox" else False, browser_type=args.browser
117    ) as user:
118        user.get(virustotal_url)
119        time.sleep(1)
120        try:
121            user.expand_subdomains()
122        except Exception as e:
123            try:
124                user.solve_recaptcha_v3()
125            except:
126                pass
127            time.sleep(1)
128            user.expand_subdomains()
129        subdomains = user.get_subdomains()
130    print(*subdomains, sep="\n")
131    print(f"Found {len(subdomains)} unique subdomains.")
132    args.output_file.write_text("\n".join(subdomains))