import json
import glob
import os
import numpy as np
import pandas as pd
from copy import deepcopy
from .transform import limit_by_freq, dict_to_array
# sqrt(2) with default precision np.float64
_SQRT2 = np.sqrt(2)
# DISTANCE METRICS
def positive_error(x, y):
"""
:param np.array x:
:param np.array y:
:return:
"""
return np.sum(np.abs(x - y))
def hellinger(x, y):
"""
:param np.array x:
:param np.array y:
:return:
"""
return np.linalg.norm(np.sqrt(x) / np.sum(x) -
np.sqrt(y) / np.sum(y)) / _SQRT2
def l2_norm(x, y):
"""
L2 norm, adapted to dtw format
:param x:
:param y:
:return: euclidean norm
"""
return np.linalg.norm(x - y)
def integrate(x, y):
"""
:param x:
:param y:
:return:
"""
diff = np.abs(x - y)
return np.trapz(diff)
distance_dict = {'positive': positive_error,
'hellinger': hellinger,
'l2_norm': l2_norm,
'integrate': integrate}
def warp_distance(distance_metric, x, y, warp=200):
"""
:param str distance_metric:
:param np.array x:
:param np.array y:
:param int warp:
:return:
"""
# Selecting the array
distance_func = distance_dict[distance_metric]
# Copying the value
x_copy = deepcopy(x)
y_copy = deepcopy(y)
# Starting the warping
min_diff = distance_func(x, y)
for i in range(1, int(warp)):
# Moving forward
forward_diff = distance_func(x_copy[i:], y_copy[:-i])
if forward_diff < min_diff:
min_diff = forward_diff
# Moving backward
backward_diff = distance_func(x_copy[:-i], y_copy[i:])
if backward_diff < forward_diff:
min_diff = backward_diff
return min_diff
[docs]def pair_distance(freq_x,
features_x,
freq_y,
features_y,
warp=None,
distance_metric='l2_norm'):
"""
Distance between song x (with frequencies and features)
and song y is calculated.
:param numpy.array freq_x: frequencies of the song x.
:param numpy.array features_x: features (fourier amplitude) of song x.
:param numpy.array freq_y: frequencies of the song y.
:param numpy.array features_y: features (fourier amplitude) of song y.
:param warp: to calculate distance with warp between series,
warp is float. If None, warp is not applied.
:param str distance_metric: name of the metric to use. Options are:
- 'positive': positive_error.
- 'hellinger': hellinger.
- 'l2_norm': l2_norm.
- 'integrate': integrate.
:return: distance in float.
"""
features_y_frame = np.interp(freq_x,
freq_y,
features_y)
if warp is None:
distance = distance_dict[distance_metric](features_x,
features_y_frame)
else:
distance = warp_distance(distance_metric,
features_x,
features_y_frame,
warp)
return distance
[docs]def distance_matrix(fourier_folder,
warp=None,
upper_limit=6000.0,
distance_metric='l2_norm'):
"""
A distance matrix with all the songs of a folder
can be calculated.
:param fourier_folder:
:param warp:
:param upper_limit:
:param distance_metric:
:return:
"""
merged_file = os.path.join(fourier_folder, 'merged_file.json')
if os.path.isfile(merged_file):
os.remove(merged_file)
read_files = glob.glob(os.path.join(fourier_folder, '*.json'))
merged_file_list = [json.load(open(f)) for f in read_files]
merged_file = merged_file_list[0]
[merged_file.update(d) for d in merged_file_list]
# Creating a squared DataFrame as matrix distance
song_names = list(merged_file.keys())
df = pd.DataFrame(columns=song_names + ['Songs'])
df['Songs'] = song_names
df = df.set_index('Songs')
for i in range(len(song_names)):
song_x = song_names[i]
freq_x, features_x = dict_to_array(merged_file[song_x])
# Filtering frequencies
freq_x, features_x = limit_by_freq(freq_x,
features_x,
upper_limit=upper_limit)
for j in range(len(song_names)):
song_y = song_names[j]
if j > i:
freq_y, features_y = dict_to_array(merged_file[song_y])
distance = pair_distance(freq_x=freq_x,
features_x=features_x,
freq_y=freq_y,
features_y=features_y,
warp=warp,
distance_metric=distance_metric)
# Save also in reverse
df.loc[song_y, song_x] = distance
elif j == i:
distance = 0.0
else:
distance = df.loc[song_x, song_y]
df.loc[song_x, song_y] = distance
df = df.sort_index(axis=0, ascending=True)
df = df.sort_index(axis=1, ascending=True)
return df