Source code for libsoni.util.utils

import librosa
import libfmp.b
import libfmp.c6
import libfmp.b
import matplotlib
from matplotlib import pyplot as plt
from matplotlib import patches
from numba import jit
import numpy as np
import pandas as pd
from typing import Tuple

SAMPLES = ['bass-drum', 'click', 'hi-hat']


[docs]def fade_signal(signal: np.ndarray, fading_duration: float = 0, fs: int = 22050) -> np.ndarray: """Fade in / out audio signal Parameters ---------- signal: np.ndarray (np.float32 / np.float64) [shape=(N, )] Signal to be faded fs: int, default = 22050 sampling rate fading_duration: float, default = 0 duration of fade-in and fade-out, in seconds Returns ------- normalized_signal: np.ndarray (np.float32 / np.float64) [shape=(N, )] Normalized signal """ num_samples = int(fading_duration * fs) # if the signal is shorter than twice of the length of the fading duration, multiply signal with sinus half-wave if len(signal) < 2 * num_samples: signal *= np.sin(np.pi * np.arange(len(signal)) / len(signal)) else: signal[:num_samples] *= np.sin(np.pi * np.arange(num_samples) / fading_duration / 2 / fs) signal[-num_samples:] *= np.cos(np.pi * np.arange(num_samples) / fading_duration / 2 / fs) return signal
[docs]def normalize_signal(signal: np.ndarray) -> np.ndarray: """Max-normalize audio signal Parameters ---------- signal: np.ndarray (np.float32 / np.float64) [shape=(N, )] Signal to be normalized Returns ------- normalized_signal: np.ndarray (np.float32 / np.float64) [shape=(N, )] Normalized signal """ normalized_signal = signal / np.max(np.abs(signal)) return normalized_signal
[docs]def warp_sample(sample: np.ndarray, reference_pitch: int, target_pitch: int, target_duration_sec: float, gain: float = 1.0, fs: int = 22050, fading_duration: float = 0.01): """This function warps a sample. Given the reference pitch of the sample provided as np.ndarray, the warped version of the sample gets pitch-shifted using librosa.effects.pitch_shift(). For the temporal alignment, if the desired duration is shorter than the original sample, the sample gets cropped, else if the desired duration is longer of the provided sample, the returned signal gets zero-padded at the end. Parameters ---------- sample: np.ndarray (np.float32 / np.float64) [shape=(K, )] Sample to be warped. reference_pitch: int Reference pitch for the given sample. target_pitch: int Target pitch for the warped sample. target_duration_sec: float Duration, given in seconds, for the returned signal. gain: float, default = 1.0 Gain of the generated tone fs: int, default = 22050 Sampling rate, in samples per seconds. fading_duration: float, default = 0.01 Duration of fade in and fade out (to avoid clicks) Returns ------- warped_sample: np.ndarray (np.float32 / np.float64) [shape=(M, )] Warped sample. """ # Compute pitch difference pitch_steps = target_pitch - reference_pitch # Apply pitch-shifting to original sample pitch_shifted_sample = librosa.effects.pitch_shift(y=sample, sr=fs, n_steps=pitch_steps) # Case: target duration is shorter than sample -> cropping if int(target_duration_sec * fs) <= len(sample): warped_sample = pitch_shifted_sample[:int(target_duration_sec * fs)] # Case: target duration is longer than sample -> zero-filling else: warped_sample = np.zeros(int(target_duration_sec * fs)) warped_sample[:len(sample)] = sample warped_sample = fade_signal(signal=warped_sample, fs=fs, fading_duration=fading_duration) warped_sample *= gain return warped_sample
[docs]def pitch_to_frequency(pitch: int, reference_pitch: int = 69, tuning_frequency: float = 440.0) -> float: """Calculates the corresponding frequency for a given pitch. Parameters ---------- pitch: int Pitch to calculate frequency for. reference_pitch: int, default = 69 Reference pitch for calculation. tuning_frequency: float, default = 440.0 Tuning frequency for calculation, in Hertz. Returns ------- frequency: float Calculated frequency for given pitch, in Hertz. """ frequency = tuning_frequency * 2 ** ((pitch - reference_pitch) / 12) return frequency
# Taken from FMP Notebooks, https://www.audiolabs-erlangen.de/resources/MIR/FMP/C6/C6S2_TempoBeat.html def plot_sonify_novelty_beats(fn_wav, fn_ann, title=''): ann, label_keys = libfmp.c6.read_annotation_pos(fn_ann, label='onset', header=0) df = pd.read_csv(fn_ann, sep=';', keep_default_na=False, header=None) Fs = 22050 x, _ = librosa.load(fn_wav, sr=Fs) x_duration = len(x) / Fs nov, Fs_nov = libfmp.c6.compute_novelty_spectrum(x, Fs=Fs, N=2048, H=256, gamma=1, M=10, norm=1) figsize = (8, 1.5) fig, ax, line = libfmp.b.plot_signal(nov, Fs_nov, color='k', figsize=figsize, title=title) libfmp.b.plot_annotation_line(ann, ax=ax, label_keys=label_keys, nontime_axis=True, time_min=0, time_max=x_duration) plt.show() return fig, ax def format_df(df: pd.DataFrame) -> pd.DataFrame: df = df.copy().rename(columns=str.lower) check_df_schema(df) if 'duration' not in df.columns: try: df['duration'] = df['end'] - df['start'] except ValueError: print('Input DataFrame must have start and duration/end columns.') else: try: df['end'] = df['start'] + df['duration'] except ValueError: print('Input DataFrame must have start and duration/end columns.') return df
[docs]def mix_sonification_and_original(sonification: np.ndarray, original_audio: np.ndarray, gain_lin_sonification: float = 1.0, gain_lin_original_audio: float = 1.0, panning: float = 1.0, duration: int = None): """This function takes a sonification and an original_audio and mixes it to stereo Parameters ---------- sonification: np.ndarray (np.float32 / np.float64) [shape=(N, )] Sonification original_audio: np.ndarray (np.float32 / np.float64) [shape=(N, )] Original audio gain_lin_sonification: float, default = 1.0 linear gain for sonification gain_lin_original_audio: float, default = 1.0 linear gain for original audio panning: float, default = 1.0 Controls the panning of the mixed output panning = 1.0 means original audio on left and sonification on right channel panning = 0.5 means same amount of both signals on both channels. panning = 0.0 means sonification on left and original audio on right channel duration: int, default = None Duration of the output waveform, given in samples. Returns ------- stereo_audio : np.ndarray (np.float32 / np.float64) [shape=(N, 2)] Mix of the signals """ assert 0.0 <= panning <= 1.0, f'Panning must a value between 0.0 and 1.0.' if duration is None: num_samples = len(original_audio) else: num_samples = duration if len(original_audio) < num_samples: original_audio = np.append(original_audio, np.zeros(num_samples - len(original_audio))) else: original_audio = original_audio[:num_samples] if len(sonification) < num_samples: sonification = np.append(sonification, np.zeros(num_samples - len(sonification))) else: sonification = sonification[:num_samples] # Perform RMS normalization # Calculate the RMS amplitude of each signal rms_signal1 = np.sqrt(np.mean(np.square(original_audio))) rms_signal2 = np.sqrt(np.mean(np.square(sonification))) # Normalize the signals to have the same RMS amplitude normalized_signal1 = original_audio * (rms_signal2 / rms_signal1) normalized_signal2 = sonification * (rms_signal1 / rms_signal2) stereo_audio = np.column_stack( (panning * (gain_lin_original_audio * normalized_signal1) + (1 - panning) * (gain_lin_sonification * normalized_signal2), panning * (gain_lin_sonification * normalized_signal2) + (1 - panning) * (gain_lin_original_audio * normalized_signal1))).T return stereo_audio
[docs]@jit(nopython=True) def smooth_weights(weights: np.ndarray, fading_samples: int = 0) -> np.ndarray: """Weight smoothing Parameters ---------- weights: (np.float32 / np.float64) [shape=(N, )] Input weights fading_samples: int Number of samples for fade-in/out. Returns ------- weights_smoothed: np.ndarray (np.float32 / np.float64) [shape=(N, )] Smoothed weights """ weights_smoothed = weights.copy() for i in range(1, len(weights) - 1): if weights[i] != weights[i - 1]: amplitude = (np.abs(weights[i - 1] - weights[i])) / 2 x = np.linspace(-1 * (np.pi / 2), np.pi / 2, fading_samples) * -1 * np.sign( weights[i - 1] - weights[i]) y = amplitude * np.sin(x) + (weights[i - 1] + weights[i]) / 2 start_idx = i - int(fading_samples / 2) end_idx = start_idx + len(y) if start_idx >= 0 and end_idx < len(weights_smoothed): weights_smoothed[start_idx:end_idx] = y return weights_smoothed
[docs]def visualize_pianoroll(pianoroll_df: pd.DataFrame, xlabel: str = 'Time (seconds)', ylabel: str = 'Pitch', title: str = None, colors: str = 'FMP_1', velocity_alpha: bool = False, figsize : Tuple[float, float] = (12, 4), ax: matplotlib.axes.Axes = None, dpi: int = 72) -> Tuple[matplotlib.figure.Figure, matplotlib.axes.Axes]: """Visualization function for piano-roll representations, given in a pd.DataFrame format Parameters ---------- pianoroll_df: pd.DataFrame Dataframe containing pitch-event information. xlabel: str, default = 'Time (seconds)' Label text for the x-axis. ylabel: str, default = 'Pitch' Label text for the y-axis. title: str, default = None Title of the figure. colors: str, default = 'FMP_1' Colormap, for the default colormap see https://github.com/meinardmueller/libfmp. velocity_alpha: bool = False Set True to weight the visualized rectangular regions for each pitch based on their velocity value. figsize: Tuple[float, float], default: [12, 4]) Figure size ax: matplotlib.axes.Axes Axes object dpi: int Resolution of the figure. Returns ------- fig: matplotlib.figure.Figure Figure instance ax: matplotlib.axes.Axes Axes object """ pianoroll_df = format_df(pianoroll_df) fig = None if ax is None: fig = plt.figure(figsize=figsize, dpi=dpi) ax = plt.subplot(1, 1, 1) labels_set = sorted(pianoroll_df['label'].unique()) colors = libfmp.b.color_argument_to_dict(colors, labels_set) pitch_min = pianoroll_df['pitch'].min() pitch_max = pianoroll_df['pitch'].max() time_min = pianoroll_df['start'].min() time_max = pianoroll_df['end'].max() for i, r in pianoroll_df.iterrows(): velocity = None if not velocity_alpha else r['velocity'] rect = patches.Rectangle((r['start'], r['pitch'] - 0.5), r['duration'], 1, linewidth=1, edgecolor='k', facecolor=colors[r['label']], alpha=velocity) ax.add_patch(rect) ax.set_ylim([pitch_min - 1.5, pitch_max + 1.5]) ax.set_xlim([min(time_min, 0), time_max + 0.5]) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(label=title) ax.grid() ax.set_axisbelow(True) ax.legend([patches.Patch(linewidth=1, edgecolor='k', facecolor=colors[key]) for key in labels_set], labels_set, loc='upper right', framealpha=1) if fig is not None: plt.tight_layout() return fig, ax
def check_df_schema(df: pd.DataFrame): try: columns_bool = (df.columns == ['start', 'duration', 'pitch', 'velocity', 'label']).all() and \ len(df.columns) == 5 if not columns_bool: raise ValueError("Columns of the dataframe must be ['start', 'duration', 'pitch', 'velocity', 'label'].") except: raise ValueError("Columns of the dataframe must be ['start', 'duration', 'pitch', 'velocity', 'label'].")