bandripper.bandripper
1import argparse 2import json 3import re 4import string 5from dataclasses import dataclass 6from pathlib import Path 7from urllib.parse import urlparse 8 9import requests 10import whosyouragent 11from bs4 import BeautifulSoup 12from noiftimer import Timer 13from printbuddies import ProgBar 14 15root = Path(__file__).parent 16 17 18def clean_string(text: str) -> str: 19 """Remove punctuation and trailing spaces from text.""" 20 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip() 21 22 23@dataclass 24class Track: 25 title: str 26 number: int 27 url: str 28 29 def __post_init__(self): 30 self.title = clean_string(self.title) 31 32 @property 33 def numbered_title(self): 34 num = str(self.number) 35 if len(num) == 1: 36 num = "0" + num 37 return f"{num} - {self.title}" 38 39 40@dataclass 41class Album: 42 url: str 43 artist: str = None 44 title: str = None 45 tracks: list[Track] = None 46 art_url: str = None 47 48 def __repr__(self): 49 return f"{self.title} by {self.artist}" 50 51 def __post_init__(self): 52 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 53 if response.status_code != 200: 54 raise RuntimeError( 55 f"Getting album info failed with code {response.status_code}" 56 ) 57 soup = BeautifulSoup(response.text, "html.parser") 58 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 59 for script in soup.find_all("script"): 60 if script.get("data-cart"): 61 data = script 62 break 63 data = json.loads(data.attrs["data-tralbum"]) 64 self.artist = clean_string(data["artist"]) 65 self.title = clean_string(data["current"]["title"]) 66 self.tracks = [ 67 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 68 for track in data["trackinfo"] 69 if track.get("file") 70 ] 71 72 73class AlbumRipper: 74 def __init__( 75 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 76 ): 77 """ 78 :param no_track_number: If True, don't add the track 79 number to the front of the track title.""" 80 self.album = Album(album_url) 81 self.no_track_number = no_track_number 82 self.overwrite = overwrite 83 84 def make_save_path(self): 85 self.save_path = Path.cwd() / self.album.artist / self.album.title 86 self.save_path.mkdir(parents=True, exist_ok=True) 87 88 @property 89 def headers(self) -> dict: 90 """Get a headers dict with a random useragent.""" 91 return whosyouragent.get_agent(as_dict=True) 92 93 def save_track(self, track_title: str, content: bytes) -> Path: 94 """Save track to self.save_path/{track_title}.mp3. 95 Returns the Path object for the save location. 96 97 :param content: The binary data of the track.""" 98 file_path = self.save_path / f"{track_title}.mp3" 99 file_path.write_bytes(content) 100 return file_path 101 102 def get_track_content(self, track_url: str) -> bytes: 103 """Make a request to track_url and return the content. 104 Raises a RunTimeError exception if response.status_code != 200.""" 105 response = requests.get(track_url, headers=self.headers) 106 if response.status_code != 200: 107 raise RuntimeError( 108 f"Downloading track failed with status code {response.status_code}." 109 ) 110 return response.content 111 112 def download_album_art(self): 113 """Download the album art and save as a .jpg.""" 114 file_path = self.save_path / f"{self.album.title}.jpg" 115 try: 116 response = requests.get(self.album.art_url, headers=self.headers) 117 file_path.write_bytes(response.content) 118 except Exception as e: 119 print(f"Failed to download art for {self.album}.") 120 print(e) 121 122 def track_exists(self, track: Track) -> bool: 123 """Return if a track already exists in self.save_path.""" 124 path = self.save_path / ( 125 track.title if self.no_track_number else track.numbered_title 126 ) 127 return path.with_suffix(".mp3").exists() 128 129 def rip(self): 130 """Download and save the album tracks and album art.""" 131 if len(self.album.tracks) == 0: 132 print(f"No public tracks available for {self.album}.") 133 return None 134 self.make_save_path() 135 self.download_album_art() 136 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 137 fails = [] 138 if not self.overwrite: 139 self.album.tracks = [ 140 track for track in self.album.tracks if not self.track_exists(track) 141 ] 142 for track in self.album.tracks: 143 bar.display( 144 suffix=f"Downloading {track.title}", 145 counter_override=1 if len(self.album.tracks) == 1 else None, 146 ) 147 try: 148 content = self.get_track_content(track.url) 149 self.save_track( 150 track.title if self.no_track_number else track.numbered_title, 151 content, 152 ) 153 except Exception as e: 154 fails.append((track, str(e))) 155 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 156 print(f"Finished downloading {self.album} in {elapsed_time}.") 157 if fails: 158 print("The following tracks failed to download:") 159 for fail in fails: 160 print(f"{fail[0].title}: {fail[1]}") 161 162 163class BandRipper: 164 def __init__( 165 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 166 ): 167 self.band_url = band_url 168 self.albums = [] 169 for url in self.get_album_urls(band_url): 170 try: 171 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 172 except Exception as e: 173 print(e) 174 175 def get_album_urls(self, band_url: str) -> list[str]: 176 """Get album urls from the main bandcamp url.""" 177 print(f"Fetching discography from {band_url}...") 178 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 179 if response.status_code != 200: 180 raise RuntimeError( 181 f"Getting {band_url} failed with status code {response.status_code}." 182 ) 183 soup = BeautifulSoup(response.text, "html.parser") 184 grid = soup.find("ol", attrs={"id": "music-grid"}) 185 parsed_url = urlparse(band_url) 186 base_url = f"https://{parsed_url.netloc}" 187 return [base_url + album.a.get("href") for album in grid.find_all("li")] 188 189 def rip(self): 190 print( 191 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 192 ) 193 timer = Timer() 194 timer.start() 195 fails = [] 196 for album in self.albums: 197 try: 198 album.rip() 199 except Exception as e: 200 fails.append((album, e)) 201 timer.stop() 202 artist = self.albums[0].album.artist 203 elapsed_time = timer.current_elapsed_time() 204 print( 205 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 206 ) 207 if fails: 208 print(f"The following downloads failed:") 209 for fail in fails: 210 print(f"{fail[0]}: {fail[1]}") 211 212 213def page_is_discography(url: str) -> bool: 214 """Returns whether the url is to a discography page or not.""" 215 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 216 if response.status_code != 200: 217 raise RuntimeError( 218 f"Getting {url} failed with status code {response.status_code}." 219 ) 220 soup = BeautifulSoup(response.text, "html.parser") 221 # Returns None if it doesn't exist. 222 grid = soup.find("ol", attrs={"id": "music-grid"}) 223 if grid: 224 return True 225 return False 226 227 228def get_args() -> argparse.Namespace: 229 parser = argparse.ArgumentParser() 230 231 parser.add_argument( 232 "urls", 233 type=str, 234 nargs="*", 235 help=""" The bandcamp url(s) for the album or artist. 236 If the url is to an artists main page, 237 all albums will be downloaded. 238 The tracks will be saved to a subdirectory of 239 your current directory. 240 If a track can't be streamed (i.e. private) it 241 won't be downloaded. Multiple urls can be passed.""", 242 ) 243 244 parser.add_argument( 245 "-n", 246 "--no_track_number", 247 action="store_true", 248 help=""" By default the track number will be added 249 to the front of the track title. Pass this switch 250 to disable the behavior.""", 251 ) 252 253 parser.add_argument( 254 "-o", 255 "--overwrite", 256 action="store_true", 257 help=""" Pass this flag to overwrite existing files. 258 Otherwise don't download tracks that already exist locally.""", 259 ) 260 261 args = parser.parse_args() 262 args.urls = [url.strip("/") for url in args.urls] 263 264 return args 265 266 267def main(args: argparse.Namespace = None): 268 if not args: 269 args = get_args() 270 for url in args.urls: 271 if page_is_discography(url): 272 ripper = BandRipper(url, args.no_track_number) 273 else: 274 ripper = AlbumRipper(url, args.no_track_number) 275 ripper.rip() 276 277 278if __name__ == "__main__": 279 main(get_args())
def
clean_string(text: str) -> str:
19def clean_string(text: str) -> str: 20 """Remove punctuation and trailing spaces from text.""" 21 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
Remove punctuation and trailing spaces from text.
@dataclass
class
Track:
@dataclass
class
Album:
41@dataclass 42class Album: 43 url: str 44 artist: str = None 45 title: str = None 46 tracks: list[Track] = None 47 art_url: str = None 48 49 def __repr__(self): 50 return f"{self.title} by {self.artist}" 51 52 def __post_init__(self): 53 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 54 if response.status_code != 200: 55 raise RuntimeError( 56 f"Getting album info failed with code {response.status_code}" 57 ) 58 soup = BeautifulSoup(response.text, "html.parser") 59 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 60 for script in soup.find_all("script"): 61 if script.get("data-cart"): 62 data = script 63 break 64 data = json.loads(data.attrs["data-tralbum"]) 65 self.artist = clean_string(data["artist"]) 66 self.title = clean_string(data["current"]["title"]) 67 self.tracks = [ 68 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 69 for track in data["trackinfo"] 70 if track.get("file") 71 ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class
AlbumRipper:
74class AlbumRipper: 75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite 84 85 def make_save_path(self): 86 self.save_path = Path.cwd() / self.album.artist / self.album.title 87 self.save_path.mkdir(parents=True, exist_ok=True) 88 89 @property 90 def headers(self) -> dict: 91 """Get a headers dict with a random useragent.""" 92 return whosyouragent.get_agent(as_dict=True) 93 94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path 102 103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content 112 113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e) 122 123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists() 129 130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 138 fails = [] 139 if not self.overwrite: 140 self.album.tracks = [ 141 track for track in self.album.tracks if not self.track_exists(track) 142 ] 143 for track in self.album.tracks: 144 bar.display( 145 suffix=f"Downloading {track.title}", 146 counter_override=1 if len(self.album.tracks) == 1 else None, 147 ) 148 try: 149 content = self.get_track_content(track.url) 150 self.save_track( 151 track.title if self.no_track_number else track.numbered_title, 152 content, 153 ) 154 except Exception as e: 155 fails.append((track, str(e))) 156 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 157 print(f"Finished downloading {self.album} in {elapsed_time}.") 158 if fails: 159 print("The following tracks failed to download:") 160 for fail in fails: 161 print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite
Parameters
- no_track_number: If True, don't add the track number to the front of the track title.
def
save_track(self, track_title: str, content: bytes) -> pathlib.Path:
94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path
Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.
Parameters
- content: The binary data of the track.
def
get_track_content(self, track_url: str) -> bytes:
103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content
Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.
def
download_album_art(self):
113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e)
Download the album art and save as a .jpg.
123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists()
Return if a track already exists in self.save_path.
def
rip(self):
130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 138 fails = [] 139 if not self.overwrite: 140 self.album.tracks = [ 141 track for track in self.album.tracks if not self.track_exists(track) 142 ] 143 for track in self.album.tracks: 144 bar.display( 145 suffix=f"Downloading {track.title}", 146 counter_override=1 if len(self.album.tracks) == 1 else None, 147 ) 148 try: 149 content = self.get_track_content(track.url) 150 self.save_track( 151 track.title if self.no_track_number else track.numbered_title, 152 content, 153 ) 154 except Exception as e: 155 fails.append((track, str(e))) 156 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 157 print(f"Finished downloading {self.album} in {elapsed_time}.") 158 if fails: 159 print("The following tracks failed to download:") 160 for fail in fails: 161 print(f"{fail[0].title}: {fail[1]}")
Download and save the album tracks and album art.
class
BandRipper:
164class BandRipper: 165 def __init__( 166 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 167 ): 168 self.band_url = band_url 169 self.albums = [] 170 for url in self.get_album_urls(band_url): 171 try: 172 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 173 except Exception as e: 174 print(e) 175 176 def get_album_urls(self, band_url: str) -> list[str]: 177 """Get album urls from the main bandcamp url.""" 178 print(f"Fetching discography from {band_url}...") 179 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 180 if response.status_code != 200: 181 raise RuntimeError( 182 f"Getting {band_url} failed with status code {response.status_code}." 183 ) 184 soup = BeautifulSoup(response.text, "html.parser") 185 grid = soup.find("ol", attrs={"id": "music-grid"}) 186 parsed_url = urlparse(band_url) 187 base_url = f"https://{parsed_url.netloc}" 188 return [base_url + album.a.get("href") for album in grid.find_all("li")] 189 190 def rip(self): 191 print( 192 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 193 ) 194 timer = Timer() 195 timer.start() 196 fails = [] 197 for album in self.albums: 198 try: 199 album.rip() 200 except Exception as e: 201 fails.append((album, e)) 202 timer.stop() 203 artist = self.albums[0].album.artist 204 elapsed_time = timer.current_elapsed_time() 205 print( 206 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 207 ) 208 if fails: 209 print(f"The following downloads failed:") 210 for fail in fails: 211 print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
165 def __init__( 166 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 167 ): 168 self.band_url = band_url 169 self.albums = [] 170 for url in self.get_album_urls(band_url): 171 try: 172 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 173 except Exception as e: 174 print(e)
def
get_album_urls(self, band_url: str) -> list[str]:
176 def get_album_urls(self, band_url: str) -> list[str]: 177 """Get album urls from the main bandcamp url.""" 178 print(f"Fetching discography from {band_url}...") 179 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 180 if response.status_code != 200: 181 raise RuntimeError( 182 f"Getting {band_url} failed with status code {response.status_code}." 183 ) 184 soup = BeautifulSoup(response.text, "html.parser") 185 grid = soup.find("ol", attrs={"id": "music-grid"}) 186 parsed_url = urlparse(band_url) 187 base_url = f"https://{parsed_url.netloc}" 188 return [base_url + album.a.get("href") for album in grid.find_all("li")]
Get album urls from the main bandcamp url.
def
rip(self):
190 def rip(self): 191 print( 192 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 193 ) 194 timer = Timer() 195 timer.start() 196 fails = [] 197 for album in self.albums: 198 try: 199 album.rip() 200 except Exception as e: 201 fails.append((album, e)) 202 timer.stop() 203 artist = self.albums[0].album.artist 204 elapsed_time = timer.current_elapsed_time() 205 print( 206 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 207 ) 208 if fails: 209 print(f"The following downloads failed:") 210 for fail in fails: 211 print(f"{fail[0]}: {fail[1]}")
def
page_is_discography(url: str) -> bool:
214def page_is_discography(url: str) -> bool: 215 """Returns whether the url is to a discography page or not.""" 216 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 217 if response.status_code != 200: 218 raise RuntimeError( 219 f"Getting {url} failed with status code {response.status_code}." 220 ) 221 soup = BeautifulSoup(response.text, "html.parser") 222 # Returns None if it doesn't exist. 223 grid = soup.find("ol", attrs={"id": "music-grid"}) 224 if grid: 225 return True 226 return False
Returns whether the url is to a discography page or not.
def
get_args() -> argparse.Namespace:
229def get_args() -> argparse.Namespace: 230 parser = argparse.ArgumentParser() 231 232 parser.add_argument( 233 "urls", 234 type=str, 235 nargs="*", 236 help=""" The bandcamp url(s) for the album or artist. 237 If the url is to an artists main page, 238 all albums will be downloaded. 239 The tracks will be saved to a subdirectory of 240 your current directory. 241 If a track can't be streamed (i.e. private) it 242 won't be downloaded. Multiple urls can be passed.""", 243 ) 244 245 parser.add_argument( 246 "-n", 247 "--no_track_number", 248 action="store_true", 249 help=""" By default the track number will be added 250 to the front of the track title. Pass this switch 251 to disable the behavior.""", 252 ) 253 254 parser.add_argument( 255 "-o", 256 "--overwrite", 257 action="store_true", 258 help=""" Pass this flag to overwrite existing files. 259 Otherwise don't download tracks that already exist locally.""", 260 ) 261 262 args = parser.parse_args() 263 args.urls = [url.strip("/") for url in args.urls] 264 265 return args
def
main(args: argparse.Namespace = None):