voxscribe.voxscribe
1import os 2from datetime import datetime 3from pathlib import Path 4 5import requests 6import speech_recognition 7from pydub import AudioSegment 8 9from whosyouragent import get_agent 10 11root = Path(__file__).parent 12 13""" Extract text from an mp3 or wav file. """ 14 15 16def download_audio_file(url: str, file_ext: str) -> Path: 17 """Downloads an audio file to 18 a folder named audio in 19 the same folder as this file. 20 21 :param file_ext: Can be either '.mp3' or '.wav'. 22 23 Returns a Path object for the 24 saved file.""" 25 dest = root / "audio" 26 dest.mkdir(parents=True, exist_ok=True) 27 filepath = (dest / str(datetime.now().timestamp())).with_suffix(file_ext) 28 source = requests.get(url, headers={"User-Agent": get_agent()}) 29 print(f"{source.status_code=}") 30 with filepath.open("wb") as file: 31 file.write(source.content) 32 return filepath 33 34 35def convert_MP3_to_WAV(MP3path: Path | str) -> Path: 36 """Converts an mp3 file to a wav file 37 of the same name and returns a Path object 38 for the wav file.""" 39 MP3path = Path(MP3path) 40 audio = AudioSegment.from_mp3(MP3path) 41 WAVpath = MP3path.with_suffix(".wav") 42 audio.export(WAVpath, format="wav") 43 return WAVpath 44 45 46def get_text_from_url(url: str, file_ext: str) -> str: 47 """Returns text from an mp3 file 48 located at the given url. 49 50 :param file_ext: Can be either '.mp3' or '.wav'""" 51 audiopath = download_audio_file(url, file_ext) 52 if file_ext == ".mp3": 53 return get_text_from_WAV(convert_MP3_to_WAV(audiopath)) 54 elif file_ext == ".wav": 55 return get_text_from_WAV(audiopath) 56 else: 57 raise Exception('file_ext param must be ".mp3" or ".wav"') 58 59 60def get_text_from_WAV(WAVpath: Path | str) -> str: 61 """Returns text from a wav file 62 located at the give file path.""" 63 WAVpath = Path(WAVpath) 64 recognizer = speech_recognition.Recognizer() 65 with speech_recognition.AudioFile(str(WAVpath)) as source: 66 audio = recognizer.record(source) 67 text = recognizer.recognize_google(audio) 68 return text 69 70 71def get_text_from_MP3(MP3path: Path | str) -> str: 72 """Returns text from an mp3 file 73 located at the give file path.""" 74 return get_text_from_WAV(convert_MP3_to_WAV(MP3path)) 75 76 77def clean_up(max_age: int): 78 """Removes any files from the audio directory 79 older than max_age minutes.""" 80 audiopath = root / "audio" 81 if audiopath.exists(): 82 for file in audiopath.glob("*.*"): 83 if (datetime.now().timestamp() - os.stat(file).st_ctime) > (60 * max_age): 84 file.unlink()
root = WindowsPath('E:/1vsCode/python/voxscribe/src/voxscribe')
Extract text from an mp3 or wav file.
def
download_audio_file(url: str, file_ext: str) -> pathlib.Path:
17def download_audio_file(url: str, file_ext: str) -> Path: 18 """Downloads an audio file to 19 a folder named audio in 20 the same folder as this file. 21 22 :param file_ext: Can be either '.mp3' or '.wav'. 23 24 Returns a Path object for the 25 saved file.""" 26 dest = root / "audio" 27 dest.mkdir(parents=True, exist_ok=True) 28 filepath = (dest / str(datetime.now().timestamp())).with_suffix(file_ext) 29 source = requests.get(url, headers={"User-Agent": get_agent()}) 30 print(f"{source.status_code=}") 31 with filepath.open("wb") as file: 32 file.write(source.content) 33 return filepath
Downloads an audio file to a folder named audio in the same folder as this file.
Parameters
- file_ext: Can be either '.mp3' or '.wav'.
Returns a Path object for the saved file.
def
convert_MP3_to_WAV(MP3path: pathlib.Path | str) -> pathlib.Path:
36def convert_MP3_to_WAV(MP3path: Path | str) -> Path: 37 """Converts an mp3 file to a wav file 38 of the same name and returns a Path object 39 for the wav file.""" 40 MP3path = Path(MP3path) 41 audio = AudioSegment.from_mp3(MP3path) 42 WAVpath = MP3path.with_suffix(".wav") 43 audio.export(WAVpath, format="wav") 44 return WAVpath
Converts an mp3 file to a wav file of the same name and returns a Path object for the wav file.
def
get_text_from_url(url: str, file_ext: str) -> str:
47def get_text_from_url(url: str, file_ext: str) -> str: 48 """Returns text from an mp3 file 49 located at the given url. 50 51 :param file_ext: Can be either '.mp3' or '.wav'""" 52 audiopath = download_audio_file(url, file_ext) 53 if file_ext == ".mp3": 54 return get_text_from_WAV(convert_MP3_to_WAV(audiopath)) 55 elif file_ext == ".wav": 56 return get_text_from_WAV(audiopath) 57 else: 58 raise Exception('file_ext param must be ".mp3" or ".wav"')
Returns text from an mp3 file located at the given url.
Parameters
- file_ext: Can be either '.mp3' or '.wav'
def
get_text_from_WAV(WAVpath: pathlib.Path | str) -> str:
61def get_text_from_WAV(WAVpath: Path | str) -> str: 62 """Returns text from a wav file 63 located at the give file path.""" 64 WAVpath = Path(WAVpath) 65 recognizer = speech_recognition.Recognizer() 66 with speech_recognition.AudioFile(str(WAVpath)) as source: 67 audio = recognizer.record(source) 68 text = recognizer.recognize_google(audio) 69 return text
Returns text from a wav file located at the give file path.
def
get_text_from_MP3(MP3path: pathlib.Path | str) -> str:
72def get_text_from_MP3(MP3path: Path | str) -> str: 73 """Returns text from an mp3 file 74 located at the give file path.""" 75 return get_text_from_WAV(convert_MP3_to_WAV(MP3path))
Returns text from an mp3 file located at the give file path.
def
clean_up(max_age: int):
78def clean_up(max_age: int): 79 """Removes any files from the audio directory 80 older than max_age minutes.""" 81 audiopath = root / "audio" 82 if audiopath.exists(): 83 for file in audiopath.glob("*.*"): 84 if (datetime.now().timestamp() - os.stat(file).st_ctime) > (60 * max_age): 85 file.unlink()
Removes any files from the audio directory older than max_age minutes.