subtotal.subtotal
1import argparse 2import time 3from pathlib import Path 4from urllib.parse import urlparse 5 6from printbuddies import ProgBar 7from seleniumuser import User 8 9root = Path(__file__).parent 10 11 12def get_args() -> argparse.Namespace: 13 parser = argparse.ArgumentParser() 14 15 parser.add_argument("url", type=str, help=""" The url to find subdomains for. """) 16 17 parser.add_argument( 18 "-o", 19 "--output_file", 20 type=str, 21 default=None, 22 help=""" Output file to dump subdomains to. 23 If unspecified, a folder named "subtotals" will 24 be created in your current working directory and 25 the results will be saved to {url}-subdomains.txt""", 26 ) 27 28 parser.add_argument( 29 "-b", 30 "--browser", 31 type=str, 32 default="firefox", 33 help=""" Browser for selenium to use. 34 Can be "firefox" or "chrome". 35 The appropriate webdriver needs to be installed 36 in your current working directory or in your PATH.""", 37 ) 38 39 args = parser.parse_args() 40 41 return args 42 43 44class User(User): 45 def expand_subdomains(self): 46 """Expand the listing of subdomains until all are visible. 47 Mostly using Javascript to deal with these nightmare shadow roots.""" 48 49 script = """ 50 var moreButton = document.getElementsByName("domain-view")[0].shadowRoot.getElementById("report").children[3].children[0].shadowRoot.children[0].children[1].children[0].getElementsByClassName("load-more mt-3")[0]; 51 function getVisibleSubdomains() { 52 return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children.length; 53 } 54 moreButton.click(); 55 return getVisibleSubdomains(); 56 """ 57 total_subdomains = self.script( 58 'return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].shadowRoot.getElementById("info-badge").textContent;' 59 ) 60 total_subdomains = total_subdomains.strip().strip(")").strip("(") 61 if "K" in total_subdomains: 62 total_subdomains = float(total_subdomains.split()[0]) * 1000 63 total_subdomains = int(total_subdomains) 64 bar = ProgBar(total_subdomains) 65 while True: 66 visible_subdomains = self.script(script) 67 bar.display( 68 prefix="Expanding subdomains", counter_override=visible_subdomains 69 ) 70 if visible_subdomains >= total_subdomains: 71 break 72 else: 73 time.sleep(1) 74 75 def get_subdomains(self) -> list[str]: 76 script = """ return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children;""" 77 subdomain_body = self.script(script) 78 subdomains = sorted( 79 list( 80 set( 81 [ 82 row.find_element("xpath", ".//div/a").text 83 for row in subdomain_body 84 ] 85 ) 86 ) 87 ) 88 return subdomains 89 90 91def get_root_domain(url: str) -> str: 92 """Get root domain location from url. 93 >>> print(get_root_domain("https://www.website.com")) 94 >>> "website.com" """ 95 root_domain = urlparse(url.lower()).netloc 96 if not root_domain: 97 return url 98 # Remove any leading "www." or subdomains 99 if root_domain.count(".") > 1: 100 return root_domain[root_domain.rfind(".", 0, root_domain.rfind(".")) + 1 :] 101 return root_domain 102 103 104def main(args: argparse.Namespace = None): 105 if not args: 106 args = get_args() 107 108 args.url = get_root_domain(args.url) 109 if not args.output_file: 110 (Path.cwd() / "subtotals").mkdir(parents=True, exist_ok=True) 111 args.output_file = Path.cwd() / "subtotals" / f"{args.url}-subdomains.txt" 112 113 virustotal_url = f"https://www.virustotal.com/gui/domain/{args.url}/relations" 114 with User( 115 headless=True if args.browser == "firefox" else False, browser_type=args.browser 116 ) as user: 117 user.get(virustotal_url) 118 time.sleep(1) 119 try: 120 user.expand_subdomains() 121 except Exception as e: 122 try: 123 user.solve_recaptcha_v3() 124 except: 125 pass 126 time.sleep(1) 127 user.expand_subdomains() 128 subdomains = user.get_subdomains() 129 print(*subdomains, sep="\n") 130 print(f"Found {len(subdomains)} unique subdomains.") 131 args.output_file.write_text("\n".join(subdomains)) 132 133 134if __name__ == "__main__": 135 main(get_args())
def
get_args() -> argparse.Namespace:
13def get_args() -> argparse.Namespace: 14 parser = argparse.ArgumentParser() 15 16 parser.add_argument("url", type=str, help=""" The url to find subdomains for. """) 17 18 parser.add_argument( 19 "-o", 20 "--output_file", 21 type=str, 22 default=None, 23 help=""" Output file to dump subdomains to. 24 If unspecified, a folder named "subtotals" will 25 be created in your current working directory and 26 the results will be saved to {url}-subdomains.txt""", 27 ) 28 29 parser.add_argument( 30 "-b", 31 "--browser", 32 type=str, 33 default="firefox", 34 help=""" Browser for selenium to use. 35 Can be "firefox" or "chrome". 36 The appropriate webdriver needs to be installed 37 in your current working directory or in your PATH.""", 38 ) 39 40 args = parser.parse_args() 41 42 return args
class
User(seleniumuser.seleniumuser.User):
45class User(User): 46 def expand_subdomains(self): 47 """Expand the listing of subdomains until all are visible. 48 Mostly using Javascript to deal with these nightmare shadow roots.""" 49 50 script = """ 51 var moreButton = document.getElementsByName("domain-view")[0].shadowRoot.getElementById("report").children[3].children[0].shadowRoot.children[0].children[1].children[0].getElementsByClassName("load-more mt-3")[0]; 52 function getVisibleSubdomains() { 53 return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children.length; 54 } 55 moreButton.click(); 56 return getVisibleSubdomains(); 57 """ 58 total_subdomains = self.script( 59 'return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].shadowRoot.getElementById("info-badge").textContent;' 60 ) 61 total_subdomains = total_subdomains.strip().strip(")").strip("(") 62 if "K" in total_subdomains: 63 total_subdomains = float(total_subdomains.split()[0]) * 1000 64 total_subdomains = int(total_subdomains) 65 bar = ProgBar(total_subdomains) 66 while True: 67 visible_subdomains = self.script(script) 68 bar.display( 69 prefix="Expanding subdomains", counter_override=visible_subdomains 70 ) 71 if visible_subdomains >= total_subdomains: 72 break 73 else: 74 time.sleep(1) 75 76 def get_subdomains(self) -> list[str]: 77 script = """ return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children;""" 78 subdomain_body = self.script(script) 79 subdomains = sorted( 80 list( 81 set( 82 [ 83 row.find_element("xpath", ".//div/a").text 84 for row in subdomain_body 85 ] 86 ) 87 ) 88 ) 89 return subdomains
Sits on top of selenium to streamline automation and scraping tasks.
def
expand_subdomains(self):
46 def expand_subdomains(self): 47 """Expand the listing of subdomains until all are visible. 48 Mostly using Javascript to deal with these nightmare shadow roots.""" 49 50 script = """ 51 var moreButton = document.getElementsByName("domain-view")[0].shadowRoot.getElementById("report").children[3].children[0].shadowRoot.children[0].children[1].children[0].getElementsByClassName("load-more mt-3")[0]; 52 function getVisibleSubdomains() { 53 return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children.length; 54 } 55 moreButton.click(); 56 return getVisibleSubdomains(); 57 """ 58 total_subdomains = self.script( 59 'return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].shadowRoot.getElementById("info-badge").textContent;' 60 ) 61 total_subdomains = total_subdomains.strip().strip(")").strip("(") 62 if "K" in total_subdomains: 63 total_subdomains = float(total_subdomains.split()[0]) * 1000 64 total_subdomains = int(total_subdomains) 65 bar = ProgBar(total_subdomains) 66 while True: 67 visible_subdomains = self.script(script) 68 bar.display( 69 prefix="Expanding subdomains", counter_override=visible_subdomains 70 ) 71 if visible_subdomains >= total_subdomains: 72 break 73 else: 74 time.sleep(1)
Expand the listing of subdomains until all are visible. Mostly using Javascript to deal with these nightmare shadow roots.
def
get_subdomains(self) -> list[str]:
76 def get_subdomains(self) -> list[str]: 77 script = """ return document.getElementsByName("domain-view")[0].shadowRoot.children[0].children[3].children[0].shadowRoot.children[0].children[1].children[0].children[0].shadowRoot.children[0].children[1].children;""" 78 subdomain_body = self.script(script) 79 subdomains = sorted( 80 list( 81 set( 82 [ 83 row.find_element("xpath", ".//div/a").text 84 for row in subdomain_body 85 ] 86 ) 87 ) 88 ) 89 return subdomains
Inherited Members
- seleniumuser.seleniumuser.User
- User
- configure_firefox
- configure_chrome
- search_for_driver
- set_implicit_wait
- open_browser
- close_browser
- open_tab
- switch_to_tab
- get_num_tabs
- close_tab
- get
- get_soup
- current_url
- turbo
- chill
- script
- remove
- get_length
- find
- find_children
- scroll
- scroll_into_view
- text
- click
- clear
- switch_to_iframe
- switch_to_parent_frame
- select
- click_elements
- get_click_list
- send_keys
- fill_next
- wait_until
- dismiss_alert
- solve_recaptcha_v3
def
get_root_domain(url: str) -> str:
92def get_root_domain(url: str) -> str: 93 """Get root domain location from url. 94 >>> print(get_root_domain("https://www.website.com")) 95 >>> "website.com" """ 96 root_domain = urlparse(url.lower()).netloc 97 if not root_domain: 98 return url 99 # Remove any leading "www." or subdomains 100 if root_domain.count(".") > 1: 101 return root_domain[root_domain.rfind(".", 0, root_domain.rfind(".")) + 1 :] 102 return root_domain
Get root domain location from url.
>>> print(get_root_domain("https://www.website.com"))
>>> "website.com"
def
main(args: argparse.Namespace = None):
105def main(args: argparse.Namespace = None): 106 if not args: 107 args = get_args() 108 109 args.url = get_root_domain(args.url) 110 if not args.output_file: 111 (Path.cwd() / "subtotals").mkdir(parents=True, exist_ok=True) 112 args.output_file = Path.cwd() / "subtotals" / f"{args.url}-subdomains.txt" 113 114 virustotal_url = f"https://www.virustotal.com/gui/domain/{args.url}/relations" 115 with User( 116 headless=True if args.browser == "firefox" else False, browser_type=args.browser 117 ) as user: 118 user.get(virustotal_url) 119 time.sleep(1) 120 try: 121 user.expand_subdomains() 122 except Exception as e: 123 try: 124 user.solve_recaptcha_v3() 125 except: 126 pass 127 time.sleep(1) 128 user.expand_subdomains() 129 subdomains = user.get_subdomains() 130 print(*subdomains, sep="\n") 131 print(f"Found {len(subdomains)} unique subdomains.") 132 args.output_file.write_text("\n".join(subdomains))