bandripper.bandripper

  1import argparse
  2import json
  3import re
  4import string
  5from dataclasses import dataclass
  6from pathlib import Path
  7from urllib.parse import urlparse
  8
  9import requests
 10import whosyouragent
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from printbuddies import ProgBar
 14
 15root = Path(__file__).parent
 16
 17
 18def clean_string(text: str) -> str:
 19    """Remove punctuation and trailing spaces from text."""
 20    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
 21
 22
 23@dataclass
 24class Track:
 25    title: str
 26    number: int
 27    url: str
 28
 29    def __post_init__(self):
 30        self.title = clean_string(self.title)
 31
 32    @property
 33    def numbered_title(self):
 34        num = str(self.number)
 35        if len(num) == 1:
 36            num = "0" + num
 37        return f"{num} - {self.title}"
 38
 39
 40@dataclass
 41class Album:
 42    url: str
 43    artist: str = None
 44    title: str = None
 45    tracks: list[Track] = None
 46    art_url: str = None
 47
 48    def __repr__(self):
 49        return f"{self.title} by {self.artist}"
 50
 51    def __post_init__(self):
 52        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
 53        if response.status_code != 200:
 54            raise RuntimeError(
 55                f"Getting album info failed with code {response.status_code}"
 56            )
 57        soup = BeautifulSoup(response.text, "html.parser")
 58        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
 59        for script in soup.find_all("script"):
 60            if script.get("data-cart"):
 61                data = script
 62                break
 63        data = json.loads(data.attrs["data-tralbum"])
 64        self.artist = clean_string(data["artist"])
 65        self.title = clean_string(data["current"]["title"])
 66        self.tracks = [
 67            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
 68            for track in data["trackinfo"]
 69            if track.get("file")
 70        ]
 71
 72
 73class AlbumRipper:
 74    def __init__(
 75        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 76    ):
 77        """
 78        :param no_track_number: If True, don't add the track
 79        number to the front of the track title."""
 80        self.album = Album(album_url)
 81        self.no_track_number = no_track_number
 82        self.overwrite = overwrite
 83
 84    def make_save_path(self):
 85        self.save_path = Path.cwd() / self.album.artist / self.album.title
 86        self.save_path.mkdir(parents=True, exist_ok=True)
 87
 88    @property
 89    def headers(self) -> dict:
 90        """Get a headers dict with a random useragent."""
 91        return whosyouragent.get_agent(as_dict=True)
 92
 93    def save_track(self, track_title: str, content: bytes) -> Path:
 94        """Save track to self.save_path/{track_title}.mp3.
 95        Returns the Path object for the save location.
 96
 97        :param content: The binary data of the track."""
 98        file_path = self.save_path / f"{track_title}.mp3"
 99        file_path.write_bytes(content)
100        return file_path
101
102    def get_track_content(self, track_url: str) -> bytes:
103        """Make a request to track_url and return the content.
104        Raises a RunTimeError exception if response.status_code != 200."""
105        response = requests.get(track_url, headers=self.headers)
106        if response.status_code != 200:
107            raise RuntimeError(
108                f"Downloading track failed with status code {response.status_code}."
109            )
110        return response.content
111
112    def download_album_art(self):
113        """Download the album art and save as a .jpg."""
114        file_path = self.save_path / f"{self.album.title}.jpg"
115        try:
116            response = requests.get(self.album.art_url, headers=self.headers)
117            file_path.write_bytes(response.content)
118        except Exception as e:
119            print(f"Failed to download art for {self.album}.")
120            print(e)
121
122    def track_exists(self, track: Track) -> bool:
123        """Return if a track already exists in self.save_path."""
124        path = self.save_path / (
125            track.title if self.no_track_number else track.numbered_title
126        )
127        return path.with_suffix(".mp3").exists()
128
129    def rip(self):
130        """Download and save the album tracks and album art."""
131        if len(self.album.tracks) == 0:
132            print(f"No public tracks available for {self.album}.")
133            return None
134        self.make_save_path()
135        self.download_album_art()
136        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
137        fails = []
138        if not self.overwrite:
139            self.album.tracks = [
140                track for track in self.album.tracks if not self.track_exists(track)
141            ]
142        for track in self.album.tracks:
143            bar.display(
144                suffix=f"Downloading {track.title}",
145                counter_override=1 if len(self.album.tracks) == 1 else None,
146            )
147            try:
148                content = self.get_track_content(track.url)
149                self.save_track(
150                    track.title if self.no_track_number else track.numbered_title,
151                    content,
152                )
153            except Exception as e:
154                fails.append((track, str(e)))
155        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
156        print(f"Finished downloading {self.album} in {elapsed_time}.")
157        if fails:
158            print("The following tracks failed to download:")
159            for fail in fails:
160                print(f"{fail[0].title}: {fail[1]}")
161
162
163class BandRipper:
164    def __init__(
165        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
166    ):
167        self.band_url = band_url
168        self.albums = []
169        for url in self.get_album_urls(band_url):
170            try:
171                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
172            except Exception as e:
173                print(e)
174
175    def get_album_urls(self, band_url: str) -> list[str]:
176        """Get album urls from the main bandcamp url."""
177        print(f"Fetching discography from {band_url}...")
178        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
179        if response.status_code != 200:
180            raise RuntimeError(
181                f"Getting {band_url} failed with status code {response.status_code}."
182            )
183        soup = BeautifulSoup(response.text, "html.parser")
184        grid = soup.find("ol", attrs={"id": "music-grid"})
185        parsed_url = urlparse(band_url)
186        base_url = f"https://{parsed_url.netloc}"
187        return [base_url + album.a.get("href") for album in grid.find_all("li")]
188
189    def rip(self):
190        print(
191            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
192        )
193        timer = Timer()
194        timer.start()
195        fails = []
196        for album in self.albums:
197            try:
198                album.rip()
199            except Exception as e:
200                fails.append((album, e))
201        timer.stop()
202        artist = self.albums[0].album.artist
203        elapsed_time = timer.current_elapsed_time()
204        print(
205            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
206        )
207        if fails:
208            print(f"The following downloads failed:")
209            for fail in fails:
210                print(f"{fail[0]}: {fail[1]}")
211
212
213def page_is_discography(url: str) -> bool:
214    """Returns whether the url is to a discography page or not."""
215    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
216    if response.status_code != 200:
217        raise RuntimeError(
218            f"Getting {url} failed with status code {response.status_code}."
219        )
220    soup = BeautifulSoup(response.text, "html.parser")
221    # Returns None if it doesn't exist.
222    grid = soup.find("ol", attrs={"id": "music-grid"})
223    if grid:
224        return True
225    return False
226
227
228def get_args() -> argparse.Namespace:
229    parser = argparse.ArgumentParser()
230
231    parser.add_argument(
232        "urls",
233        type=str,
234        nargs="*",
235        help=""" The bandcamp url(s) for the album or artist.
236            If the url is to an artists main page,
237            all albums will be downloaded.
238            The tracks will be saved to a subdirectory of
239            your current directory.
240            If a track can't be streamed (i.e. private) it
241            won't be downloaded. Multiple urls can be passed.""",
242    )
243
244    parser.add_argument(
245        "-n",
246        "--no_track_number",
247        action="store_true",
248        help=""" By default the track number will be added
249        to the front of the track title. Pass this switch
250        to disable the behavior.""",
251    )
252
253    parser.add_argument(
254        "-o",
255        "--overwrite",
256        action="store_true",
257        help=""" Pass this flag to overwrite existing files.
258        Otherwise don't download tracks that already exist locally.""",
259    )
260
261    args = parser.parse_args()
262    args.urls = [url.strip("/") for url in args.urls]
263
264    return args
265
266
267def main(args: argparse.Namespace = None):
268    if not args:
269        args = get_args()
270    for url in args.urls:
271        if page_is_discography(url):
272            ripper = BandRipper(url, args.no_track_number, args.overwrite)
273        else:
274            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
275        ripper.rip()
276
277
278if __name__ == "__main__":
279    main(get_args())
def clean_string(text: str) -> str:
19def clean_string(text: str) -> str:
20    """Remove punctuation and trailing spaces from text."""
21    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()

Remove punctuation and trailing spaces from text.

@dataclass
class Track:
24@dataclass
25class Track:
26    title: str
27    number: int
28    url: str
29
30    def __post_init__(self):
31        self.title = clean_string(self.title)
32
33    @property
34    def numbered_title(self):
35        num = str(self.number)
36        if len(num) == 1:
37            num = "0" + num
38        return f"{num} - {self.title}"
Track(title: str, number: int, url: str)
@dataclass
class Album:
41@dataclass
42class Album:
43    url: str
44    artist: str = None
45    title: str = None
46    tracks: list[Track] = None
47    art_url: str = None
48
49    def __repr__(self):
50        return f"{self.title} by {self.artist}"
51
52    def __post_init__(self):
53        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
54        if response.status_code != 200:
55            raise RuntimeError(
56                f"Getting album info failed with code {response.status_code}"
57            )
58        soup = BeautifulSoup(response.text, "html.parser")
59        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
60        for script in soup.find_all("script"):
61            if script.get("data-cart"):
62                data = script
63                break
64        data = json.loads(data.attrs["data-tralbum"])
65        self.artist = clean_string(data["artist"])
66        self.title = clean_string(data["current"]["title"])
67        self.tracks = [
68            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
69            for track in data["trackinfo"]
70            if track.get("file")
71        ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class AlbumRipper:
 74class AlbumRipper:
 75    def __init__(
 76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 77    ):
 78        """
 79        :param no_track_number: If True, don't add the track
 80        number to the front of the track title."""
 81        self.album = Album(album_url)
 82        self.no_track_number = no_track_number
 83        self.overwrite = overwrite
 84
 85    def make_save_path(self):
 86        self.save_path = Path.cwd() / self.album.artist / self.album.title
 87        self.save_path.mkdir(parents=True, exist_ok=True)
 88
 89    @property
 90    def headers(self) -> dict:
 91        """Get a headers dict with a random useragent."""
 92        return whosyouragent.get_agent(as_dict=True)
 93
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path
102
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content
112
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)
122
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()
129
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
138        fails = []
139        if not self.overwrite:
140            self.album.tracks = [
141                track for track in self.album.tracks if not self.track_exists(track)
142            ]
143        for track in self.album.tracks:
144            bar.display(
145                suffix=f"Downloading {track.title}",
146                counter_override=1 if len(self.album.tracks) == 1 else None,
147            )
148            try:
149                content = self.get_track_content(track.url)
150                self.save_track(
151                    track.title if self.no_track_number else track.numbered_title,
152                    content,
153                )
154            except Exception as e:
155                fails.append((track, str(e)))
156        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
157        print(f"Finished downloading {self.album} in {elapsed_time}.")
158        if fails:
159            print("The following tracks failed to download:")
160            for fail in fails:
161                print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
75    def __init__(
76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
77    ):
78        """
79        :param no_track_number: If True, don't add the track
80        number to the front of the track title."""
81        self.album = Album(album_url)
82        self.no_track_number = no_track_number
83        self.overwrite = overwrite
Parameters
  • no_track_number: If True, don't add the track number to the front of the track title.
def make_save_path(self):
85    def make_save_path(self):
86        self.save_path = Path.cwd() / self.album.artist / self.album.title
87        self.save_path.mkdir(parents=True, exist_ok=True)
headers: dict

Get a headers dict with a random useragent.

def save_track(self, track_title: str, content: bytes) -> pathlib.Path:
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path

Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.

Parameters
  • content: The binary data of the track.
def get_track_content(self, track_url: str) -> bytes:
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content

Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.

def download_album_art(self):
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)

Download the album art and save as a .jpg.

def track_exists(self, track: bandripper.bandripper.Track) -> bool:
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()

Return if a track already exists in self.save_path.

def rip(self):
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
138        fails = []
139        if not self.overwrite:
140            self.album.tracks = [
141                track for track in self.album.tracks if not self.track_exists(track)
142            ]
143        for track in self.album.tracks:
144            bar.display(
145                suffix=f"Downloading {track.title}",
146                counter_override=1 if len(self.album.tracks) == 1 else None,
147            )
148            try:
149                content = self.get_track_content(track.url)
150                self.save_track(
151                    track.title if self.no_track_number else track.numbered_title,
152                    content,
153                )
154            except Exception as e:
155                fails.append((track, str(e)))
156        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
157        print(f"Finished downloading {self.album} in {elapsed_time}.")
158        if fails:
159            print("The following tracks failed to download:")
160            for fail in fails:
161                print(f"{fail[0].title}: {fail[1]}")

Download and save the album tracks and album art.

class BandRipper:
164class BandRipper:
165    def __init__(
166        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
167    ):
168        self.band_url = band_url
169        self.albums = []
170        for url in self.get_album_urls(band_url):
171            try:
172                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
173            except Exception as e:
174                print(e)
175
176    def get_album_urls(self, band_url: str) -> list[str]:
177        """Get album urls from the main bandcamp url."""
178        print(f"Fetching discography from {band_url}...")
179        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
180        if response.status_code != 200:
181            raise RuntimeError(
182                f"Getting {band_url} failed with status code {response.status_code}."
183            )
184        soup = BeautifulSoup(response.text, "html.parser")
185        grid = soup.find("ol", attrs={"id": "music-grid"})
186        parsed_url = urlparse(band_url)
187        base_url = f"https://{parsed_url.netloc}"
188        return [base_url + album.a.get("href") for album in grid.find_all("li")]
189
190    def rip(self):
191        print(
192            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
193        )
194        timer = Timer()
195        timer.start()
196        fails = []
197        for album in self.albums:
198            try:
199                album.rip()
200            except Exception as e:
201                fails.append((album, e))
202        timer.stop()
203        artist = self.albums[0].album.artist
204        elapsed_time = timer.current_elapsed_time()
205        print(
206            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
207        )
208        if fails:
209            print(f"The following downloads failed:")
210            for fail in fails:
211                print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
165    def __init__(
166        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
167    ):
168        self.band_url = band_url
169        self.albums = []
170        for url in self.get_album_urls(band_url):
171            try:
172                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
173            except Exception as e:
174                print(e)
def get_album_urls(self, band_url: str) -> list[str]:
176    def get_album_urls(self, band_url: str) -> list[str]:
177        """Get album urls from the main bandcamp url."""
178        print(f"Fetching discography from {band_url}...")
179        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
180        if response.status_code != 200:
181            raise RuntimeError(
182                f"Getting {band_url} failed with status code {response.status_code}."
183            )
184        soup = BeautifulSoup(response.text, "html.parser")
185        grid = soup.find("ol", attrs={"id": "music-grid"})
186        parsed_url = urlparse(band_url)
187        base_url = f"https://{parsed_url.netloc}"
188        return [base_url + album.a.get("href") for album in grid.find_all("li")]

Get album urls from the main bandcamp url.

def rip(self):
190    def rip(self):
191        print(
192            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
193        )
194        timer = Timer()
195        timer.start()
196        fails = []
197        for album in self.albums:
198            try:
199                album.rip()
200            except Exception as e:
201                fails.append((album, e))
202        timer.stop()
203        artist = self.albums[0].album.artist
204        elapsed_time = timer.current_elapsed_time()
205        print(
206            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
207        )
208        if fails:
209            print(f"The following downloads failed:")
210            for fail in fails:
211                print(f"{fail[0]}: {fail[1]}")
def page_is_discography(url: str) -> bool:
214def page_is_discography(url: str) -> bool:
215    """Returns whether the url is to a discography page or not."""
216    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
217    if response.status_code != 200:
218        raise RuntimeError(
219            f"Getting {url} failed with status code {response.status_code}."
220        )
221    soup = BeautifulSoup(response.text, "html.parser")
222    # Returns None if it doesn't exist.
223    grid = soup.find("ol", attrs={"id": "music-grid"})
224    if grid:
225        return True
226    return False

Returns whether the url is to a discography page or not.

def get_args() -> argparse.Namespace:
229def get_args() -> argparse.Namespace:
230    parser = argparse.ArgumentParser()
231
232    parser.add_argument(
233        "urls",
234        type=str,
235        nargs="*",
236        help=""" The bandcamp url(s) for the album or artist.
237            If the url is to an artists main page,
238            all albums will be downloaded.
239            The tracks will be saved to a subdirectory of
240            your current directory.
241            If a track can't be streamed (i.e. private) it
242            won't be downloaded. Multiple urls can be passed.""",
243    )
244
245    parser.add_argument(
246        "-n",
247        "--no_track_number",
248        action="store_true",
249        help=""" By default the track number will be added
250        to the front of the track title. Pass this switch
251        to disable the behavior.""",
252    )
253
254    parser.add_argument(
255        "-o",
256        "--overwrite",
257        action="store_true",
258        help=""" Pass this flag to overwrite existing files.
259        Otherwise don't download tracks that already exist locally.""",
260    )
261
262    args = parser.parse_args()
263    args.urls = [url.strip("/") for url in args.urls]
264
265    return args
def main(args: argparse.Namespace = None):
268def main(args: argparse.Namespace = None):
269    if not args:
270        args = get_args()
271    for url in args.urls:
272        if page_is_discography(url):
273            ripper = BandRipper(url, args.no_track_number, args.overwrite)
274        else:
275            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
276        ripper.rip()