import os, platform, re, shutil, subprocess
from collections import defaultdict, namedtuple
from collections.abc import Iterable, Iterator
from contextlib import contextmanager
from fractions import Fraction as frac
from functools import reduce
from itertools import repeat, takewhile
from shutil import which
from tempfile import NamedTemporaryFile as Temp
from zipfile import ZipFile as Zip
import pandas as pd
import numpy as np
import webcolors
from pathos import multiprocessing
from tqdm import tqdm
from pytablewriter import MarkdownTableWriter
from .logger import function_logger, update_cfg
STANDARD_NAMES = ['notes', 'rests', 'notes_and_rests', 'measures', 'events', 'labels', 'chords', 'expanded',
'harmonies', 'cadences', 'form_labels', 'MS3', 'score', 'scores', 'mscx']
""":obj:`list`
Indicators for subcorpora: If a folder contains any file or folder beginning or ending on any of these names, it is
considered to be a subcorpus by the function :py:func:`iterate_subcorpora`.
"""
DCML_REGEX = re.compile(r"""
^(\.?
((?P<globalkey>[a-gA-G](b*|\#*))\.)?
((?P<localkey>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)+)\.)?
((?P<pedal>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)+)\[)?
(?P<chord>
(?P<numeral>(b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i|Ger|It|Fr|@none))
(?P<form>(%|o|\+|M|\+M))?
(?P<figbass>(7|65|43|42|2|64|6))?
(\((?P<changes>((\+|-|\^|v)?(b*|\#*)\d)+)\))?
(/(?P<relativeroot>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)*))?
)
(?P<pedalend>\])?
)?
(\|(?P<cadence>((HC|PAC|IAC|DC|EC|PC)(\..+?)?)))?
(?P<phraseend>(\\\\|\}\{|\{|\}))?$
""", re.VERBOSE)
""":obj:`str`
Constant with a regular expression that recognizes labels conforming to the DCML harmony annotation standard excluding those
consisting of two alternatives.
"""
DCML_DOUBLE_REGEX = re.compile(r"""
^(?P<first>
(\.?
((?P<globalkey>[a-gA-G](b*|\#*))\.)?
((?P<localkey>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)+)\.)?
((?P<pedal>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)+)\[)?
(?P<chord>
(?P<numeral>(b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i|Ger|It|Fr|@none))
(?P<form>(%|o|\+|M|\+M))?
(?P<figbass>(7|65|43|42|2|64|6))?
(\((?P<changes>((\+|-|\^|v)?(b*|\#*)\d)+)\))?
(/(?P<relativeroot>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)*))?
)
(?P<pedalend>\])?
)?
(\|(?P<cadence>((HC|PAC|IAC|DC|EC|PC)(\..+?)?)))?
(?P<phraseend>(\\\\|\}\{|\{|\})
)?
)
(-
(?P<second>
((?P<globalkey2>[a-gA-G](b*|\#*))\.)?
((?P<localkey2>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)+)\.)?
((?P<pedal2>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)+)\[)?
(?P<chord2>
(?P<numeral2>(b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i|Ger|It|Fr|@none))
(?P<form2>(%|o|\+|M|\+M))?
(?P<figbass2>(7|65|43|42|2|64|6))?
(\((?P<changes2>((\+|-|\^|v)?(b*|\#*)\d)+)\))?
(/(?P<relativeroot2>((b*|\#*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i)/?)*))?
)
(?P<pedalend2>\])?
)?
(\|(?P<cadence2>((HC|PAC|IAC|DC|EC|PC)(\..+?)?)))?
(?P<phraseend2>(\\\\|\}\{|\{|\})
)?
)?
$
""",
re.VERBOSE)
""":obj:`str`
Constant with a regular expression that recognizes complete labels conforming to the DCML harmony annotation standard
including those consisting of two alternatives, without having to split them. It is simply a doubled version of DCML_REGEX.
"""
FORM_DETECTION_REGEX = r"\d{1,2}(?:i+|\w)?:"
FORM_LEVEL_REGEX = r"(?P<levels>(?:(?:\d{1,2})(?:i+|\w)?[\&=]?)+):(?P<token>(?:\D|\d+(?!(?:$|i+|\w)?[\&=:]))+)"
MS3_HTML = {'#005500': 'ms3_darkgreen',
'#aa0000': 'ms3_darkred',
'#aa5500': 'ms3_sienna',
'#00aa00': 'ms3_green',
'#aaaa00': 'ms3_darkgoldenrod',
'#aaff00': 'ms3_chartreuse',
'#00007f': 'ms3_navy',
'#aa007f': 'ms3_darkmagenta',
'#00557f': 'ms3_teal',
'#aa557f': 'ms3_indianred',
'#00aa7f': 'ms3_darkcyan',
'#aaaa7f': 'ms3_darkgray',
'#aaff7f': 'ms3_palegreen',
'#aa00ff': 'ms3_darkviolet',
'#0055ff': 'ms3_dodgerblue',
'#aa55ff': 'ms3_mediumorchid',
'#00aaff': 'ms3_deepskyblue',
'#aaaaff': 'ms3_lightsteelblue',
'#aaffff': 'ms3_paleturquoise',
'#550000': 'ms3_maroon',
'#555500': 'ms3_darkolivegreen',
'#ff5500': 'ms3_orangered',
'#55aa00': 'ms3_olive',
'#ffaa00': 'ms3_orange',
'#55ff00': 'ms3_lawngreen',
'#55007f': 'ms3_indigo',
'#ff007f': 'ms3_deeppink',
'#55557f': 'ms3_darkslateblue',
'#ff557f': 'ms3_lightcoral',
'#55aa7f': 'ms3_mediumseagreen',
'#ffaa7f': 'ms3_lightsalmon',
'#55ff7f': 'ms3_lightgreen',
'#ffff7f': 'ms3_khaki',
'#5500ff': 'ms3_blue',
'#5555ff': 'ms3_royalblue',
'#ff55ff': 'ms3_violet',
'#55aaff': 'ms3_cornflowerblue',
'#ffaaff': 'ms3_lightpink',
'#55ffff': 'ms3_aquamarine'}
MS3_RGB = {(0, 85, 0): 'ms3_darkgreen',
(170, 0, 0): 'ms3_darkred',
(170, 85, 0): 'ms3_sienna',
(0, 170, 0): 'ms3_green',
(170, 170, 0): 'ms3_darkgoldenrod',
(170, 255, 0): 'ms3_chartreuse',
(0, 0, 127): 'ms3_navy',
(170, 0, 127): 'ms3_darkmagenta',
(0, 85, 127): 'ms3_teal',
(170, 85, 127): 'ms3_indianred',
(0, 170, 127): 'ms3_darkcyan',
(170, 170, 127): 'ms3_darkgray',
(170, 255, 127): 'ms3_palegreen',
(170, 0, 255): 'ms3_darkviolet',
(0, 85, 255): 'ms3_dodgerblue',
(170, 85, 255): 'ms3_mediumorchid',
(0, 170, 255): 'ms3_deepskyblue',
(170, 170, 255): 'ms3_lightsteelblue',
(170, 255, 255): 'ms3_paleturquoise',
(85, 0, 0): 'ms3_maroon',
(85, 85, 0): 'ms3_darkolivegreen',
(255, 85, 0): 'ms3_orangered',
(85, 170, 0): 'ms3_olive',
(255, 170, 0): 'ms3_orange',
(85, 255, 0): 'ms3_lawngreen',
(85, 0, 127): 'ms3_indigo',
(255, 0, 127): 'ms3_deeppink',
(85, 85, 127): 'ms3_darkslateblue',
(255, 85, 127): 'ms3_lightcoral',
(85, 170, 127): 'ms3_mediumseagreen',
(255, 170, 127): 'ms3_lightsalmon',
(85, 255, 127): 'ms3_lightgreen',
(255, 255, 127): 'ms3_khaki',
(85, 0, 255): 'ms3_blue',
(85, 85, 255): 'ms3_royalblue',
(255, 85, 255): 'ms3_violet',
(85, 170, 255): 'ms3_cornflowerblue',
(255, 170, 255): 'ms3_lightpink',
(85, 255, 255): 'ms3_aquamarine'}
CSS2MS3 = {c[4:]: c for c in MS3_HTML.values()}
CSS_COLORS = list(webcolors.CSS3_NAMES_TO_HEX.keys())
COLORS = sum([[c, CSS2MS3[c]] if c in CSS2MS3 else [c] for c in CSS_COLORS], [])
rgba = namedtuple('RGBA', ['r', 'g', 'b', 'a'])
[docs]class map_dict(dict):
"""Such a dictionary can be mapped to a Series to replace its values but leaving the values absent from the dict keys intact."""
def __missing__(self, key):
return key
[docs]@function_logger
def add_quarterbeats_col(df, offset_dict, insert_after='mc', interval_index=False):
""" Insert a column measuring the distance of events from MC 1 in quarter notes. If no 'mc_onset' column is present,
the column corresponds to the ``insert_after`` column's measure counts.
Parameters
----------
df : :obj:`pandas.DataFrame`
DataFrame with an ``mc_playthrough`` and an ``mc_onset`` column.
offset_dict : :obj:`pandas.Series` or :obj:`dict`
| If unfolded: {mc_playthrough -> offset}
| Otherwise: {mc -> offset}
| You can create the dict using the function :py:meth:`Parse.get_continuous_offsets()<ms3.parse.Parse.get_continuous_offsets>`
insert_after : :obj:`str`, optional
Name of the column after which the new column will be inserted.
interval_index : :obj:`bool`, optional
Defaults to False. Pass True to replace the index with an :obj:`pandas.IntervalIndex` (depends on the successful
creation of the column ``duration_qb``).
Returns
-------
"""
if offset_dict is None:
logger.warning(f"No offset_dict was passed: Not adding quarterbeats.")
return df
if 'quarterbeats' not in df.columns:
df = df.copy()
quarterbeats = df[insert_after].map(offset_dict)
if 'mc_onset' in df.columns:
quarterbeats += df.mc_onset * 4
insert_here = df.columns.get_loc(insert_after) + 1
df.insert(insert_here, 'quarterbeats', quarterbeats)
if 'duration_qb' not in df.columns:
if 'duration' in df.columns:
dur = (df.duration * 4).astype(float).round(3)
df.insert(insert_here + 1, 'duration_qb', dur)
elif 'end' in offset_dict:
present_qb = df.quarterbeats.notna()
try:
ivs = make_interval_index(df.loc[present_qb, 'quarterbeats'].astype(float).round(3),
end_value=float(offset_dict['end']))
df.insert(insert_here + 1, 'duration_qb', pd.NA)
df.loc[present_qb, 'duration_qb'] = ivs.length
except:
logger.warning("Error while creating durations from quarterbeats column. Check consistency (quarterbeats need to be monotically ascending; 'end' value in offset_dict needs to be larger than the last quarterbeat).")
else:
logger.warning("Column 'duration_qb' could not be created.")
else:
logger.debug("quarterbeats column was already present.")
if interval_index and all(c in df.columns for c in ('quarterbeats', 'duration_qb')):
df = replace_index_by_intervals(df)
return df
[docs]def assert_all_lines_equal(before, after, original, tmp_file):
""" Compares two multiline strings to test equality."""
diff = [(i, bef, aft) for i, (bef, aft) in enumerate(zip(before.splitlines(), after.splitlines()), 1) if bef != aft]
if len(diff) > 0:
line_n, left, _ = zip(*diff)
ln = len(str(max(line_n)))
left_col = max(len(s) for s in left)
folder, file = os.path.split(original)
tmp_persist = os.path.join(folder, '..', file)
shutil.copy(tmp_file.name, tmp_persist)
diff = [('', original, tmp_persist)] + diff
assert len(diff) == 0, '\n' + '\n'.join(
f"{a:{ln}} {b:{left_col}} {c}" for a, b, c in diff)
[docs]def assert_dfs_equal(old, new, exclude=[]):
""" Compares the common columns of two DataFrames to test equality."""
old_l, new_l = len(old), len(new)
greater_length = max(old_l, new_l)
if old_l != new_l:
print(f"Old length: {old_l}, new length: {new_l}")
old_is_shorter = new_l == greater_length
shorter = old if old_is_shorter else new
missing_rows = abs(old_l - new_l)
shorter_cols = shorter.columns
patch = pd.DataFrame([['missing row'] * len(shorter_cols)] * missing_rows, columns=shorter_cols)
shorter = pd.concat([shorter, patch], ignore_index=True)
if old_is_shorter:
old = shorter
else:
new = shorter
old.index.rename('old_ix', inplace=True)
new.index.rename('new_ix', inplace=True)
cols = [col for col in set(old.columns).intersection(set(new.columns)) if col not in exclude]
nan_eq = lambda a, b: (a == b) | pd.isnull(a) & pd.isnull(b)
diff = [(i, j, ~nan_eq(o, n)) for ((i, o), (j, n)) in zip(old[cols].iterrows(), new[cols].iterrows())]
old_bool = pd.DataFrame.from_dict({ix: bool_series for ix, _, bool_series in diff}, orient='index')
new_bool = pd.DataFrame.from_dict({ix: bool_series for _, ix, bool_series in diff}, orient='index')
diffs_per_col = old_bool.sum(axis=0)
def show_diff():
comp_str = []
for col, n_diffs in diffs_per_col.items():
if n_diffs > 0:
comparison = pd.concat([old.loc[old_bool[col], ['mc', col]].reset_index(drop=True).iloc[:20],
new.loc[new_bool[col], ['mc', col]].iloc[:20].reset_index(drop=True)],
axis=1,
keys=['old', 'new'])
comp_str.append(
f"{n_diffs}/{greater_length} ({n_diffs / greater_length * 100:.2f} %) rows are different for {col}{' (showing first 20)' if n_diffs > 20 else ''}:\n{comparison}\n")
return '\n'.join(comp_str)
assert diffs_per_col.sum() == 0, show_diff()
[docs]def ambitus2oneliner(ambitus):
""" Turns a ``metadata['parts'][staff_id]`` dictionary into a string."""
if 'min_midi' in ambitus:
return f"{ambitus['min_midi']}-{ambitus['max_midi']} ({ambitus['min_name']}-{ambitus['max_name']})"
if 'max_midi' in ambitus:
return f"{ambitus['max_midi']}-{ambitus['max_midi']} ({ambitus['max_name']}-{ambitus['max_name']})"
return ''
[docs]def changes2list(changes, sort=True):
""" Splits a string of changes into a list of 4-tuples.
Example
-------
>>> changes2list('+#7b5')
[('+#7', '+', '#', '7'),
('b5', '', 'b', '5')]
"""
res = [t for t in re.findall(r"((\+|-|\^|v)?(#+|b+)?(1\d|\d))", changes)]
return sorted(res, key=lambda x: int(x[3]), reverse=True) if sort else res
[docs]def changes2tpc(changes, numeral, minor=False, root_alterations=False):
"""
Given a numeral and changes, computes the intervals that the changes represent.
Changes do not express absolute intervals but instead depend on the numeral and the mode.
Uses: split_scale_degree(), changes2list()
Parameters
----------
changes : :obj:`str`
A string of changes following the DCML harmony standard.
numeral : :obj:`str`
Roman numeral. If it is preceded by accidentals, it depends on the parameter
`root_alterations` whether these are taken into account.
minor : :obj:`bool`, optional
Set to true if the `numeral` occurs in a minor context.
root_alterations : :obj:`bool`, optional
Set to True if accidentals of the root should change the result.
"""
root_alteration, num_degree = split_scale_degree(numeral, count=True, logger=logger)
# build 2-octave diatonic scale on C major/minor
root = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII'].index(num_degree.upper())
tpcs = 2 * [i for i in (0, 2, -3, -1, 1, -4, -2)] if minor else 2 * [i for i in (0, 2, 4, -1, 1, 3, 5)]
tpcs = tpcs[root:] + tpcs[:root] # starting the scale from chord root
root = tpcs[0]
if root_alterations:
root += 7 * root_alteration
tpcs[0] = root
alts = changes2list(changes, sort=False)
acc2tpc = lambda accidentals: 7 * (accidentals.count('#') - accidentals.count('b'))
return [(full, added, acc, chord_interval,
(tpcs[int(chord_interval) - 1] + acc2tpc(acc) - root) if not chord_interval in ['3', '5'] else None) for
full, added, acc, chord_interval in alts]
[docs]def check_labels(df, regex, column='label', split_regex=None, return_cols=['mc', 'mc_onset', 'staff', 'voice']):
""" Checks the labels in ``column`` against ``regex`` and returns those that don't match.
Parameters
----------
df : :obj:`pandas.DataFrame`
DataFrame containing a column with labels.
regex : :obj:`str`
Regular expression that incorrect labels don't match.
column : :obj:`str`, optional
Column name where the labels are. Defaults to 'label'
split_regex : :obj:`str`, optional
If you pass a regular expression (or simple string), it will be used to split the labels before checking the
resulting column separately. Instead, pass True to use the default (a '-' that does not precede a scale degree).
return_cols : :obj:`list`, optional
Pass a list of the DataFrame columns that you want to be displayed for the wrong labels.
Returns
-------
df : :obj:`pandas.DataFrame`
DataFrame with wrong labels.
"""
if split_regex is not None:
if split_regex == True:
check_this = split_alternatives(df, column=column, alternatives_only=True)
else:
check_this = split_alternatives(df, column=column, regex=split_regex, max=1000, alternatives_only=True)
else:
check_this = df[[column]]
if regex.__class__ != re.compile('').__class__:
regex = re.compile(regex, re.VERBOSE)
not_matched = check_this.apply(lambda c: ~c.str.match(regex).fillna(True))
cols = [c for c in return_cols if c in df.columns]
select_wrong = not_matched.any(axis=1)
res = check_this.where(not_matched, other='.')[select_wrong]
res = res.apply(lambda c: c.str.replace('^/$', 'empty_harmony', regex=True))
return pd.concat([df.loc[select_wrong, cols], res], axis=1)
[docs]def color2rgba(c):
""" Pass a RGB or RGBA tuple, HTML color or name to convert it to RGBA """
if isinstance(c, tuple):
if len(c) > 3:
return rgba(*c[:4])
if len(c) == 3:
return rgba(*(c + (255,)))
else:
return rgba(*c)
if c[0] == '#':
return html_color2rgba(c)
return color_name2rgba(c)
[docs]def color_name2html(n):
""" Converts a single CSS3 name into HTML"""
return color_name2format(n, format='html')
[docs]def color_name2rgb(n):
""" Converts a single CSS3 name into RGB"""
return color_name2format(n, format='rgb')
[docs]def color_name2rgba(n):
""" Converts a single CSS3 name into RGBA"""
return color_name2format(n, format='rgba')
def color_params2rgba(color_name=None, color_html=None, color_r=None, color_g=None, color_b=None, color_a=None):
if all(pd.isnull(param) for param in [color_name, color_html, color_r, color_g, color_b, color_a]):
return None
res = None
if not pd.isnull(color_r):
if pd.isnull(color_a):
color_a = 255
if pd.isnull(color_g) or pd.isnull(color_b):
if pd.isnull(color_name) and pd.isnull(color_html):
self.logger.warning(f"Not a valid RGB color: {(color_r, color_g, color_b)}")
else:
res = (color_r, color_g, color_b, color_a)
if res is None and not pd.isnull(color_html):
res = color2rgba(color_html)
if res is None and not pd.isnull(color_name):
res = color2rgba(color_name)
return rgba(*res)
def allnamesequal(name):
return all(n == name[0] for n in name[1:])
[docs]def commonprefix(paths, sep='/'):
""" Returns common prefix of a list of paths.
Uses: allnamesequal(), itertools.takewhile()"""
bydirectorylevels = zip(*[p.split(sep) for p in paths])
return sep.join(x[0] for x in takewhile(allnamesequal, bydirectorylevels))
[docs]def compute_mn(df):
""" Compute measure numbers from a measure list with columns ['dont_count', 'numbering_offset']
"""
excluded = df['dont_count'].fillna(0).astype(bool)
offset = df['numbering_offset']
mn = (~excluded).cumsum()
if offset.notna().any():
offset = offset.fillna(0).astype(int).cumsum()
mn += offset
return mn.rename('mn')
@function_logger
def convert(old, new, MS='mscore'):
process = [MS, "-fo", new, old] #[MS, '--appimage-extract-and-run', "-fo", new, old] if MS.endswith('.AppImage') else [MS, "-fo", new, old]
if subprocess.run(process):
logger.info(f"Converted {old} to {new}")
else:
logger.warning("Error while converting " + old)
[docs]@function_logger
def convert_folder(directory, new_folder, extensions=[], target_extension='mscx', regex='.*', suffix=None, recursive=True,
ms='mscore', overwrite=False, parallel=False):
""" Convert all files in `dir` that have one of the `extensions` to .mscx format using the executable `MS`.
Parameters
----------
directory, new_folder : str
Directories
extensions : list, optional
If you want to convert only certain formats, give those, e.g. ['mscz', 'xml']
recursive : bool, optional
Subdirectories as well.
MS : str, optional
Give the path to the MuseScore executable on your system. Need only if
the command 'mscore' does not execute MuseScore on your system.
"""
MS = get_musescore(ms)
assert MS is not None, f"MuseScore not found: {ms}"
if target_extension[0] == '.':
target_extension = target_extension[1:]
conversion_params = []
#logger.info(f"Traversing {dir} {'' if recursive else 'non-'}recursively...")
if len(extensions) > 0:
exclude_re = f"^(?:(?!({'|'.join(extensions)})).)*$"
else:
exclude_re = ''
if new_folder is None:
new_folder = directory
new_dirs = {}
try:
for subdir, file in scan_directory(directory, file_re=regex, exclude_re=exclude_re, recursive=recursive, subdirs=True, exclude_files_only=True):
if subdir in new_dirs:
new_subdir = new_dirs[subdir]
else:
old_subdir = os.path.relpath(subdir, directory)
new_subdir = os.path.join(new_folder, old_subdir) if old_subdir != '.' else new_folder
os.makedirs(new_subdir, exist_ok=True)
new_dirs[subdir] = new_subdir
name, _ = os.path.splitext(file)
if suffix is not None:
fname = f"{name}{suffix}.{target_extension}"
else:
fname = f"{name}.{target_extension}"
old = os.path.join(subdir, file)
new = os.path.join(new_subdir, fname)
if overwrite or not os.path.isfile(new):
conversion_params.append((old, new, MS))
else:
logger.debug(new, 'exists already. Pass -o to overwrite.')
if len(conversion_params) == 0:
logger.info(f"No files to convert.")
except:
logger.error(f"Failed to scan directory {directory} because of the following error:")
raise
# TODO: pass filenames as 'logger' argument to convert()
if parallel:
pool = multiprocessing.Pool(multiprocessing.cpu_count())
pool.starmap(convert, conversion_params)
pool.close()
pool.join()
else:
for o, n, ms in conversion_params:
convert(o, n, ms)
[docs]def decode_harmonies(df, label_col='label', keep_type=True, return_series=False, alt_cols='alt_label', alt_separator='-'):
"""MuseScore stores types 2 (Nashville) and 3 (absolute chords) in several columns. This function returns a copy of
the DataFrame ``Annotations.df`` where the label column contains the strings corresponding to these columns.
Parameters
----------
df : :obj:`pandas.DataFrame`
DataFrame with encoded harmony labels as stored in an :obj:`Annotations` object.
label_col : :obj:`str`, optional
Column name where the main components (<name> tag) are stored, defaults to 'label'
keep_type : :obj:`bool`, optional
Defaults to True, retaining the 'label_type' column and setting types 2 and 3 to 0.
return_series : :obj:`bool`, optional
If set to True, only the decoded labels column is returned as a Series rather than a copy of ``df``.
alt_cols : :obj:`str` or :obj:`list`, optional
Column(s) with alternative labels that are joined with the label columns using ``alt_separator``. Defaults to
'alt_label'. Suppress by passing None.
alt_separator: :obj:`str`, optional
Separator for joining ``alt_cols``.
Returns
-------
:obj:`pandas.DataFrame` or :obj:`pandas.Series`
Decoded harmony labels.
"""
df = df.copy()
drop_cols, compose_label = [], []
if 'nashville' in df.columns:
sel = df.nashville.notna()
df.loc[sel, label_col] = df.loc[sel, 'nashville'].astype(str) + df.loc[sel, label_col].replace('/', '')
drop_cols.append('nashville')
if 'leftParen' in df.columns:
df.leftParen.replace('/', '(', inplace=True)
compose_label.append('leftParen')
drop_cols.append('leftParen')
if 'absolute_root' in df.columns:
df.absolute_root = fifths2name(df.absolute_root, ms=True)
compose_label.append('absolute_root')
drop_cols.append('absolute_root')
if 'rootCase' in df.columns:
sel = df.rootCase.notna()
df.loc[sel, 'absolute_root'] = df.loc[sel, 'absolute_root'].str.lower()
drop_cols.append('rootCase')
if label_col in df.columns:
compose_label.append(label_col)
if 'absolute_base' in df.columns:
df.absolute_base = '/' + fifths2name(df.absolute_base, ms=True)
compose_label.append('absolute_base')
drop_cols.append('absolute_base')
if 'rightParen' in df.columns:
df.rightParen.replace('/', ')', inplace=True)
compose_label.append('rightParen')
drop_cols.append('rightParen')
new_label_col = df[compose_label].fillna('').sum(axis=1).astype(str)
new_label_col = new_label_col.str.replace('^/$', 'empty_harmony', regex=True).replace('', np.nan)
if alt_cols is not None:
if isinstance(alt_cols, str):
alt_cols = [alt_cols]
present = [c for c in alt_cols if c in df.columns]
if len(present) > 0:
alt_joined = pd.Series('', index=new_label_col.index)
for c in present:
alt_joined += (alt_separator + df[c]).fillna('')
new_label_col += alt_joined
if return_series:
return new_label_col
if 'label_type' in df.columns:
if keep_type:
df.loc[df.label_type.isin([2, 3, '2', '3']), 'label_type'] == 0
else:
drop_cols.append('label_type')
df[label_col] = new_label_col
df.drop(columns=drop_cols, inplace=True)
return df
[docs]def df2md(df, name="Overview"):
""" Turns a DataFrame into a MarkDown table. The returned writer can be converted into a string.
"""
writer = MarkdownTableWriter()
writer.table_name = name
writer.header_list = list(df.columns.values)
writer.value_matrix = df.values.tolist()
return writer
[docs]def dict2oneliner(d):
""" Turns a dictionary into a single-line string without brackets."""
return ', '.join(f"{k}: {v}" for k, v in d.items())
[docs]def fifths2acc(fifths):
""" Returns accidentals for a stack of fifths that can be combined with a
basic representation of the seven steps."""
return abs(fifths // 7) * 'b' if fifths < 0 else fifths // 7 * '#'
[docs]def fifths2iv(fifths, smallest=False):
""" Return interval name of a stack of fifths such that
0 = 'P1', -1 = 'P4', -2 = 'm7', 4 = 'M3' etc. If you pass ``smallest=True``, intervals of a fifth or greater
will be inverted (e.g. 'm6' => '-M3' and 'D5' => '-A4').
Uses: map2elements()
"""
if isinstance(fifths, Iterable):
return map2elements(fifths, fifths2iv)
if pd.isnull(fifths):
return fifths
interval_qualities = {0: ['P', 'P', 'P', 'M', 'M', 'M', 'M'],
-1: ['D', 'D', 'D', 'm', 'm', 'm', 'm']}
interval_qualities_inverted = {0: ['P', 'P', 'P', 'm', 'm', 'm', 'm'],
-1: ['A', 'A', 'A', 'M', 'M', 'M', 'M']}
fifths += 1 # making 0 = fourth, 1 = unison, 2 = fifth etc.
pos = fifths % 7
int_num = [4, 1, 5, 2, 6, 3, 7][pos]
qual_region = fifths // 7
if smallest and int_num > 4:
int_num = 9 - int_num
if qual_region in interval_qualities_inverted:
int_qual = interval_qualities_inverted[qual_region][pos]
elif qual_region < 0:
int_qual = (abs(qual_region) - 1) * 'A'
else:
int_qual = qual_region * 'D'
int_qual = '-' + int_qual
else:
if qual_region in interval_qualities:
int_qual = interval_qualities[qual_region][pos]
elif qual_region < 0:
int_qual = (abs(qual_region) - 1) * 'D'
else:
int_qual = qual_region * 'A'
return int_qual + str(int_num)
[docs]def fifths2name(fifths, midi=None, ms=False):
""" Return note name of a stack of fifths such that
0 = C, -1 = F, -2 = Bb, 1 = G etc.
Uses: map2elements(), fifths2str()
Parameters
----------
fifths : :obj:`int`
Tonal pitch class to turn into a note name.
midi : :obj:`int`
In order to include the octave into the note name,
pass the corresponding MIDI pitch.
ms : :obj:`bool`, optional
Pass True if ``fifths`` is a MuseScore TPC, i.e. C = 14
"""
try:
fifths = int(float(fifths))
except:
if isinstance(fifths, Iterable):
return map2elements(fifths, fifths2name, ms=ms)
return fifths
if ms:
fifths -= 14
note_names = ['F', 'C', 'G', 'D', 'A', 'E', 'B']
name = fifths2str(fifths, note_names, inverted=True)
if midi is not None:
octave = midi2octave(midi, fifths)
return f"{name}{octave}"
return name
[docs]def fifths2pc(fifths):
""" Turn a stack of fifths into a chromatic pitch class.
Uses: map2elements()
"""
try:
fifths = int(float(fifths))
except:
if isinstance(fifths, Iterable):
return map2elements(fifths, fifths2pc)
return fifths
return int(7 * fifths % 12)
[docs]def fifths2rn(fifths, minor=False, auto_key=False):
"""Return Roman numeral of a stack of fifths such that
0 = I, -1 = IV, 1 = V, -2 = bVII in major, VII in minor, etc.
Uses: map2elements(), is_minor_mode()
Parameters
----------
auto_key : :obj:`bool`, optional
By default, the returned Roman numerals are uppercase. Pass True to pass upper-
or lowercase according to the position in the scale.
"""
if isinstance(fifths, Iterable):
return map2elements(fifths, fifths2rn, minor=minor)
if pd.isnull(fifths):
return fifths
rn = ['VI', 'III', 'VII', 'IV', 'I', 'V', 'II'] if minor else ['IV', 'I', 'V', 'II', 'VI', 'III', 'VII']
sel = fifths + 3 if minor else fifths
res = fifths2str(sel, rn)
if auto_key and is_minor_mode(fifths, minor):
return res.lower()
return res
[docs]def fifths2sd(fifths, minor=False):
"""Return scale degree of a stack of fifths such that
0 = '1', -1 = '4', -2 = 'b7' in major, '7' in minor etc.
Uses: map2elements(), fifths2str()
"""
if isinstance(fifths, Iterable):
return map2elements(fifths, fifths2sd, minor=minor)
if pd.isnull(fifths):
return fifths
sd = ['6', '3', '7', '4', '1', '5', '2'] if minor else ['4', '1', '5', '2', '6', '3', '7']
if minor:
fifths += 3
return fifths2str(fifths, sd)
[docs]def fifths2str(fifths, steps, inverted=False):
""" Boiler plate used by fifths2-functions.
"""
fifths += 1
acc = fifths2acc(fifths)
if inverted:
return steps[fifths % 7] + acc
return acc + steps[fifths % 7]
def get_ms_version(mscx_file):
with open(mscx_file, encoding='utf-8') as file:
for i, l in enumerate(file):
if i < 2:
pass
if i == 2:
m = re.search(r"<programVersion>(.*?)</programVersion>", l)
if m is None:
return None
else:
return m.group(1)
[docs]@function_logger
def get_musescore(MS):
""" Tests whether a MuseScore executable can be found on the system.
Uses: test_binary()
Parameters
----------
MS : :obj:`str`
A path to the executable, installed command, or one of the keywords {'auto', 'win', 'mac'}
Returns
-------
:obj:`str`
Path to the executable if found or None.
"""
if MS is None:
return MS
if MS == 'auto':
mapping = {
'Windows': 'win',
'Darwin': 'mac',
'Linux': 'mscore'
}
system = platform.system()
try:
MS = mapping[system]
except:
logger.warning(f"System could not be inferred: {system}")
MS = 'mscore'
if MS == 'win':
program_files = os.environ['PROGRAMFILES']
MS = os.path.join(program_files, r"MuseScore 3\bin\MuseScore3.exe")
elif MS == 'mac':
MS = "/Applications/MuseScore 3.app/Contents/MacOS/mscore"
return test_binary(MS, logger=logger)
[docs]def get_path_component(path, after):
"""Returns only the path's subfolders below ``after``. If ``after`` is the last
component, '.' is returned."""
dir1, base1 = os.path.split(path)
if dir1 in ('', '.', '/', '~'):
if base1 == after:
return '.'
return path
dir2, base2 = os.path.split(dir1)
if base2 == after:
return base1
higher_levels = get_path_component(dir1, after=after)
return os.path.join(higher_levels, base1)
[docs]def group_id_tuples(l):
""" Turns a list of (key, ix) into a {key: [ix]}
"""
d = defaultdict(list)
for k, i in l:
if k is not None:
d[k].append(i)
return dict(d)
[docs]def html_color2name(h):
""" Converts a HTML color into its CSS3 name or itself if there is none."""
return html_color2format(h, 'name')
[docs]def html_color2rgb(h):
""" Converts a HTML color into RGB."""
return html_color2format(h, 'rgb')
[docs]def html_color2rgba(h):
""" Converts a HTML color into RGBA."""
return html_color2format(h, 'rgba')
[docs]@function_logger
def is_any_row_equal(df1, df2):
""" Returns True if any two rows of the two DataFrames contain the same value tuples. """
assert len(df1.columns) == len(df2.columns), "Pass the same number of columns for both DataFrames"
v1 = set(df1.itertuples(index=False, name=None))
v2 = set(df2.itertuples(index=False, name=None))
return v1.intersection(v2)
[docs]def is_minor_mode(fifths, minor=False):
""" Returns True if the scale degree `fifths` naturally has a minor third in the scale.
"""
thirds = [-4, -3, -2, -1, 0, 1, 2] if minor else [3, 4, 5, -1, 0, 1, 2]
third = thirds[(fifths + 1) % 7] - fifths
return third == -3
[docs]def iter_nested(nested):
"""Iterate through any nested structure of lists and tuples from left to right."""
for elem in nested:
if isinstance(elem, list) or isinstance(elem, tuple):
for lower in iter_nested(elem):
yield lower
else:
yield elem
[docs]def iter_selection(collectio, selector=None, opposite=False):
""" Returns a generator of ``collectio``. ``selector`` can be a collection of index numbers to select or unselect
elements -- depending on ``opposite`` """
if selector is None:
for e in collectio:
yield e
if opposite:
for i, e in enumerate(collectio):
if i not in selector:
yield e
else:
for i, e in enumerate(collectio):
if i in selector:
yield e
def iterable2str(iterable):
try:
return ', '.join(str(s) for s in iterable)
except:
return iterable
[docs]def iterate_subcorpora(path: str,
prefixes: Iterable = None, # Iterable[str] would require python>=3.9
suffixes: Iterable = None,
ignore_case: bool = True) -> Iterator:
""" Recursively walk through subdirectory and files but stop and return path as soon as
at least one file or at least one folder matches at least one prefix or at least one suffix.
Parameters
----------
path : :obj:`str`
Directory to scan.
prefixes : :obj:`collections.abc.Iterable`, optional
Current directory is returned if at least one contained item starts with one of the prefixes.
suffixes : :obj:`collections.abc.Iterable`, optional
Current directory is returned if at least one contained item ends with one of the suffixes.
Files are tested against suffixes including and excluding file extensions.
Defaults to ``['notes', 'rests', 'notes_and_rests', 'measures', 'events', 'labels', 'chords', 'expanded',
'harmonies', 'cadences', 'form_labels', 'MS3']``
ignore_case : :obj:`bool`, optional
Defaults to True, meaning that file and folder names match prefixes and suffixes independent
of capitalization.
Yields
------
:obj:`str`
Full path of the next subcorpus.
"""
def check_fname(s):
if ignore_case:
return any(s.lower().startswith(p) for p in prefixes) or \
any(s.lower().endswith(suf) for suf in suffixes)
return any(s.startswith(p) for p in prefixes) or \
any(s.endswith(suf) for suf in suffixes)
if prefixes is None:
prefixes = []
if suffixes is None:
suffixes = STANDARD_NAMES
if ignore_case:
prefixes = [p.lower() for p in prefixes]
suffixes = [s.lower() for s in suffixes]
for d, subdirs, files in os.walk(path):
subdirs[:] = sorted(subdirs)
if files != []:
fnames, _ = zip(*[os.path.splitext(f) for f in files])
else:
fnames = []
if any(check_fname(f) for f in files) or \
any(check_fname(d) for d in subdirs) or \
any(check_fname(fn) for fn in fnames):
del(subdirs[:])
yield d
[docs]@function_logger
def join_tsvs(dfs, sort_cols=False):
""" Performs outer join on the passed DataFrames based on 'mc' and 'mc_onset', if any.
Uses: functools.reduce(), sort_cols(), sort_note_lists()
Parameters
----------
dfs : :obj:`Collection`
Collection of DataFrames to join.
sort_cols : :obj:`bool`, optional
If you pass True, the remaining columns (those that are not defined in the standard column order in the function
sort_cols) will be sorted.
Returns
-------
"""
if len(dfs) == 1:
return dfs[0]
zero, one, two = [], [], []
for df in dfs:
if 'mc' in df.columns:
if 'mc_onset' in df.columns:
two.append(df)
else:
one.append(df)
else:
zero.append(df)
join_order = two + one
if len(zero) > 0:
logger.info(f"{len(zero)} DataFrames contain none of the columns 'mc' and 'mc_onset'.")
pos_cols = ['mc', 'mc_onset']
def join_tsv(a, b):
join_cols = [c for c in pos_cols if c in a.columns and c in b.columns]
res = pd.merge(a, b, how='outer', on=join_cols, suffixes=('', '_y')).reset_index(drop=True)
duplicates = [col for col in res.columns if col.endswith('_y')]
for d in duplicates:
left = d[:-2]
if res[left].isna().any():
res[left].fillna(res[d], inplace=True)
return res.drop(columns=duplicates)
res = reduce(join_tsv, join_order)
if 'midi' in res.columns:
res = sort_note_list(res)
elif len(two) > 0:
res = res.sort_values(pos_cols)
else:
res = res.sort_values('mc')
return column_order(res, sort=sort_cols).reset_index(drop=True)
def str2inttuple(l):
if l == '':
return tuple()
res = []
for s in l.split(', '):
try:
res.append(int(s))
except:
print(f"String value '{s}' could not be converted to a tuple.")
return tuple(res)
def int2bool(s):
try:
return bool(int(s))
except:
return s
def safe_frac(s):
try:
return frac(s)
except:
return s
[docs]def load_tsv(path, index_col=None, sep='\t', converters={}, dtypes={}, stringtype=False, **kwargs):
""" Loads the TSV file `path` while applying correct type conversion and parsing tuples.
Parameters
----------
path : :obj:`str`
Path to a TSV file as output by format_data().
index_col : :obj:`list`, optional
By default, the first two columns are loaded as MultiIndex.
The first level distinguishes pieces and the second level the elements within.
converters, dtypes : :obj:`dict`, optional
Enhances or overwrites the mapping from column names to types included the constants.
stringtype : :obj:`bool`, optional
If you're using pandas >= 1.0.0 you might want to set this to True in order
to be using the new `string` datatype that includes the new null type `pd.NA`.
"""
CONVERTERS = {
'added_tones': str2inttuple,
'act_dur': safe_frac,
'chord_tones': str2inttuple,
'globalkey_is_minor': int2bool,
'localkey_is_minor': int2bool,
'mc_offset': safe_frac,
'mc_onset': safe_frac,
'mn_onset': safe_frac,
'next': str2inttuple,
'nominal_duration': safe_frac,
'quarterbeats': safe_frac,
'onset': safe_frac,
'duration': safe_frac,
'scalar': safe_frac, }
DTYPES = {
'absolute_base': 'Int64',
'absolute_root': 'Int64',
'alt_label': str,
'barline': str,
'base': 'Int64',
'bass_note': 'Int64',
'cadence': str,
'cadences_id': 'Int64',
'changes': str,
'chord': str,
'chord_id': 'Int64',
'chord_type': str,
'color_name': str,
'color_html': str,
'color_r': 'Int64',
'color_g': 'Int64',
'color_b': 'Int64',
'color_a': 'Int64',
'dont_count': 'Int64',
'figbass': str,
'form': str,
'globalkey': str,
'gracenote': str,
'harmonies_id': 'Int64',
'keysig': 'Int64',
'label': str,
'label_type': object,
'leftParen': str,
'localkey': str,
'mc': 'Int64',
'mc_playthrough': 'Int64',
'midi': 'Int64',
'mn': str,
'offset:x': str,
'offset_x': str,
'offset:y': str,
'offset_y': str,
'nashville': 'Int64',
'notes_id': 'Int64',
'numbering_offset': 'Int64',
'numeral': str,
'pedal': str,
'playthrough': 'Int64',
'phraseend': str,
'relativeroot': str,
'repeats': str,
'rightParen': str,
'root': 'Int64',
'rootCase': 'Int64',
'slur': str,
'special': str,
'staff': 'Int64',
'tied': 'Int64',
'timesig': str,
'tpc': 'Int64',
'voice': 'Int64',
'voices': 'Int64',
'volta': 'Int64'
}
if converters is None:
conv = None
else:
conv = dict(CONVERTERS)
conv.update(converters)
if dtypes is None:
types = None
else:
types = dict(DTYPES)
types.update(dtypes)
if stringtype:
types = {col: 'string' if typ == str else typ for col, typ in types.items()}
df = pd.read_csv(path, sep=sep, index_col=index_col,
dtype=types,
converters=conv, **kwargs)
if 'mn' in df:
mn_volta = mn2int(df.mn)
df.mn = mn_volta.mn
if mn_volta.volta.notna().any():
if 'volta' not in df.columns:
df['volta'] = pd.Series(pd.NA, index=df.index).astype('Int64')
df.volta.fillna(mn_volta.volta, inplace=True)
return df
[docs]def make_continuous_offset(act_durs, quarters=True, negative_anacrusis=None):
""" In order to compute continuous offset, this function is required to compute each MC's offset from the
piece's beginning.
Parameters
----------
act_durs : :obj:`pandas.Series`
A series of actual measures durations as fractions of whole notes (might differ from time signature).
quarters : :obj:`bool`, optional
By default, the continuous offsets are expressed in quarter notes. Pass false to leave them as fractions
of a whole note.
negative_anacrusis : :obj:`fractions.Fraction`
By default, the first value is 0. If you pass a fraction here, the first value will be its negative and the
second value will be 0.
Returns
-------
:obj:`pandas.Series`
Cumulative sum of the values, shifted down by 1.
"""
if quarters:
act_durs = act_durs * 4
res = act_durs.cumsum()
last_val = res.iloc[-1]
last_ix = res.index[-1] + 1
res = res.shift(fill_value=0)
res = res.append(pd.Series([last_val], index=[last_ix]))
res = res.append(pd.Series([last_val], index=['end']))
if negative_anacrusis is not None:
res -= abs(frac(negative_anacrusis))
return res
[docs]def make_id_tuples(key, n):
""" For a given key, this function returns index tuples in the form [(key, 0), ..., (key, n)]
Returns
-------
list
indices in the form [(key, 0), ..., (key, n)]
"""
return list(zip(repeat(key), range(n)))
[docs]def make_interval_index(S, end_value=None, closed='left', **kwargs):
""" Interpret a Series as interval breaks and make an IntervalIndex out of it.
Parameters
----------
S : :obj:`pandas.Series`
Interval breaks. It is assumed that the breaks are sorted.
end_value : numeric
Often you want to pass the right border of the last interval.
closed : :obj:`str`
Defaults to 'left'. This and the kwargs are fed to :py:meth:`pandas.IntervalIndex.from_breaks`.
kwargs
Returns
-------
"""
breaks = S.to_list()
if end_value is not None:
last = breaks[-1]
if end_value > last:
breaks += [end_value]
else:
breaks += [last]
try:
iix = pd.IntervalIndex.from_breaks(breaks, closed=closed, **kwargs)
except:
print(breaks)
raise
return iix
[docs]def make_name_columns(df):
"""Relies on the columns ``localkey`` and ``globalkey`` to transform the columns ``root`` and ``bass_notes`` from
scale degrees (expressed as fifths) to absolute note names, e.g. in C major: 0 => 'C', 7 => 'C#', -5 => 'Db'
Uses: transform(), scale_degree2name"""
new_cols = {}
for col in ('root', 'bass_note'):
if col in df.columns:
new_cols[f"{col}_name"] = transform(df, scale_degree2name, [col, 'localkey', 'globalkey'])
return pd.DataFrame(new_cols)
[docs]def map2elements(e, f, *args, **kwargs):
""" If `e` is an iterable, `f` is applied to all elements.
"""
if isinstance(e, Iterable) and not isinstance(e, str):
return e.__class__(map2elements(x, f, *args, **kwargs) for x in e)
return f(e, *args, **kwargs)
[docs]@function_logger
def merge_ties(df, return_dropped=False, perform_checks=True):
""" In a note list, merge tied notes to single events with accumulated durations.
Input dataframe needs columns ['duration', 'tied', 'midi', 'staff']. This
function does not handle correctly overlapping ties on the same pitch since
it doesn't take into account the notational layers ('voice').
Parameters
----------
df
return_dropped
Returns
-------
"""
def merge(df):
vc = df.tied.value_counts()
if vc[1] != 1 or vc[-1] != 1:
logger.warning(f"More than one 1 or -1:\n{vc}")
ix = df.iloc[0].name
dur = df.duration.sum()
drop = df.iloc[1:].index.to_list()
return pd.Series({'ix': ix, 'duration': dur, 'dropped': drop})
def merge_notes(staff_midi):
staff_midi['chunks'] = (staff_midi.tied == 1).astype(int).cumsum()
t = staff_midi.groupby('chunks', group_keys=False).apply(merge)
return t.set_index('ix')
if not df.tied.notna().any():
return df
df = df.copy()
notna = df.loc[df.tied.notna(), ['duration', 'tied', 'midi', 'staff']]
if perform_checks:
before = notna.tied.value_counts()
new_dur = notna.groupby(['staff', 'midi'], group_keys=False).apply(merge_notes).sort_index()
try:
df.loc[new_dur.index, 'duration'] = new_dur.duration
except:
print(new_dur)
if return_dropped:
df.loc[new_dur.index, 'dropped'] = new_dur.dropped
df = df.drop(new_dur.dropped.sum())
if perform_checks:
after = df.tied.value_counts()
assert before[1] == after[1], f"Error while merging ties. Before:\n{before}\nAfter:\n{after}"
return df
[docs]@function_logger
def midi2octave(midi, fifths=None):
""" For a given MIDI pitch, calculate the octave. Middle octave = 4
Uses: fifths2pc(), map2elements()
Parameters
----------
midi : :obj:`int`
MIDI pitch (positive integer)
fifths : :obj:`int`, optional
To be precise, for some Tonal Pitch Classes, the octave deviates
from the simple formula ``MIDI // 12 - 1``, e.g. for B# or Cb.
"""
try:
midi = int(float(midi))
except:
if isinstance(midi, Iterable):
return map2elements(midi, midi2octave)
return midi
i = -1
if fifths is not None:
pc = fifths2pc(fifths)
if midi % 12 != pc:
logger.debug(f"midi2octave(): The Tonal Pitch Class {fifths} cannot be MIDI pitch {midi} ")
if fifths in [
12, # B#
19, # B##
26, # B###
24, # A###
]:
i -= 1
elif fifths in [
-7, # Cb
-14, # Cbb
-21, # Cbbb
-19, # Dbbb
]:
i += 1
return midi // 12 + i
def midi2name(midi):
names = {0: 'C',
1: 'C#/Db',
2: 'D',
3: 'D#/Eb',
4: 'E',
5: 'F',
6: 'F#/Gb',
7: 'G',
8: 'G#/Ab',
9: 'A',
10: 'A#/Bb',
11: 'B'}
return names[midi % 12]
[docs]def mn2int(mn_series):
""" Turn a series of measure numbers parsed as strings into two integer columns 'mn' and 'volta'. """
try:
split = mn_series.fillna('').str.extract(r"(?P<mn>\d+)(?P<volta>[a-g])?")
except:
mn_series = pd.DataFrame(mn_series, columns=['mn', 'volta'])
try:
return mn_series.astype('Int64')
except:
return mn_series
split.mn = pd.to_numeric(split.mn)
split.volta = pd.to_numeric(split.volta.map({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}))
return split.astype('Int64')
[docs]@function_logger
def name2fifths(nn):
""" Turn a note name such as `Ab` into a tonal pitch class, such that -1=F, 0=C, 1=G etc.
Uses: split_note_name()
"""
if nn.__class__ == int or pd.isnull(nn):
return nn
name_tpcs = {'C': 0, 'D': 2, 'E': 4, 'F': -1, 'G': 1, 'A': 3, 'B': 5}
accidentals, note_name = split_note_name(nn, count=True, logger=logger)
if note_name is None:
return None
step_tpc = name_tpcs[note_name.upper()]
return step_tpc + 7 * accidentals
[docs]@function_logger
def name2pc(nn):
""" Turn a note name such as `Ab` into a tonal pitch class, such that -1=F, 0=C, 1=G etc.
Uses: split_note_name()
"""
if nn.__class__ == int or pd.isnull(nn):
logger.warning(f"'{nn}' is not a valid note name.")
return nn
name_tpcs = {'C': 0, 'D': 2, 'E': 4, 'F': 5, 'G': 7, 'A': 9, 'B': 11}
accidentals, note_name = split_note_name(nn, count=True, logger=logger)
if note_name is None:
return None
step_pc = name_tpcs[note_name.upper()]
return (step_pc + accidentals) % 12
[docs]def next2sequence(nxt):
""" Turns a 'next' column into the correct sequence of MCs corresponding to unfolded repetitions.
Requires that the Series' index be the MCs as in ``measures.set_index('mc').next``.
"""
mc = nxt.index[0]
result = []
nxt = nxt.to_dict()
while mc != -1:
result.append(mc)
new_mc, *rest = nxt[mc]
if len(rest) > 0:
nxt[mc] = rest
mc = new_mc
return result
[docs]@function_logger
def no_collections_no_booleans(df, coll_columns=None, bool_columns=None):
"""
Cleans the DataFrame columns ['next', 'chord_tones', 'added_tones'] from tuples and the columns
['globalkey_is_minor', 'localkey_is_minor'] from booleans, converting them all to integers
"""
if df is None:
return df
collection_cols = ['next', 'chord_tones', 'added_tones']
bool_cols = ['globalkey_is_minor', 'localkey_is_minor']
if coll_columns is not None:
collection_cols += list(coll_columns)
if bool_columns is not None:
bool_cols += list(bool_columns)
try:
cc = [c for c in collection_cols if c in df.columns]
except:
logger.error(f"df needs to be a DataFrame, not a {df.__class__}.")
return df
if len(cc) > 0:
df = df.copy()
df.loc[:, cc] = transform(df[cc], iterable2str, column_wise=True)
logger.debug(f"Transformed iterables in the columns {cc} to strings.")
bc = [c for c in bool_cols if c in df.columns]
if len(bc) > 0:
conv = {c: int for c in bc}
df = df.astype(conv)
return df
def ordinal_suffix(n):
suffixes = {
1: 'st',
2: 'nd',
3: 'rd'
}
n = str(n)
if n[-1] in suffixes:
return suffixes[n[-1]]
return 'th'
[docs]def parts_info(d):
"""
Turns a (nested) ``metadata['parts']`` dict into a flat dict based on staves.
Example
-------
>>> d = s.mscx.metadata
>>> parts_info(d['parts'])
{'staff_1_instrument': 'Voice',
'staff_1_ambitus': '66-76 (F#4-E5)',
'staff_2_instrument': 'Voice',
'staff_2_ambitus': '55-69 (G3-A4)',
'staff_3_instrument': 'Voice',
'staff_3_ambitus': '48-67 (C3-G4)',
'staff_4_instrument': 'Voice',
'staff_4_ambitus': '41-60 (F2-C4)'}
"""
res = {}
for part_dict in d.values():
for id in part_dict['staves']:
name = f"staff_{id}"
res[f"{name}_instrument"] = part_dict['instrument']
amb_name = name + '_ambitus'
res[amb_name] = ambitus2oneliner(part_dict[amb_name])
return res
[docs]@function_logger
def path2type(path):
""" Determine a file's type by scanning its path for default components in the constant STANDARD_NAMES.
Parameters
----------
path
Returns
-------
"""
comp2type = {comp: comp for comp in STANDARD_NAMES}
comp2type['MS3'] = 'scores'
comp2type['harmonies'] = 'expanded'
found_components = [comp for comp in comp2type.keys() if comp in path]
n_found = len(found_components)
if n_found == 0:
score_extensions = ('.mscx', '.mscz', '.cap', '.capx', '.midi', '.mid', '.musicxml', '.mxl', '.xml')
_, fext = os.path.splitext(path)
if fext.lower() in score_extensions:
logger.debug(f"Recognized file extension '{fext}' as score.")
return 'scores'
logger.debug(f"Type could not be inferred from path '{path}'.")
return 'unknown'
if n_found == 1:
typ = comp2type[found_components[0]]
logger.debug(f"Path '{path}' recognized as {typ}.")
return typ
else:
shortened_path = path
while shortened_path > 0:
shortened_path, base = os.path.split(shortened_path)
for comp in comp2type.keys():
if comp in base:
typ = comp2type[comp]
logger.debug(f"Multiple components ({', '.join(found_components)}) found in path '{path}'. Chose the last one: {typ}")
return typ
logger.warning(f"Components {', '.join(found_components)} found in path '{path}', but not in one of its constituents.")
return 'unknown'
[docs]def pretty_dict(d, heading=None):
""" Turns a dictionary into a string where the keys are printed in a column, separated by '->'.
"""
if heading is not None:
d = dict(KEY=str(heading), **d)
left = max(len(str(k)) for k in d.keys())
res = []
for k, v in d.items():
ks = str(k)
if isinstance(v, pd.DataFrame) or isinstance(v, pd.Series):
vs = v.to_string()
else:
vs = str(v)
if '\n' in vs:
lines = vs.split('\n')
res.extend([f"{ks if i == 0 else '':{left}} -> {l}" for i, l in enumerate(lines)])
else:
res.append(f"{ks:{left}} -> {vs}")
if heading is not None:
res.insert(1, '-' * (left + len(heading) + 4))
return '\n'.join(res)
[docs]def resolve_dir(d):
""" Resolves '~' to HOME directory and turns ``d`` into an absolute path.
"""
if d is None:
return None
if '~' in d:
return os.path.expanduser(d)
return os.path.abspath(d)
[docs]def rgb_tuple2html(t):
""" Converts a single RGB tuple into HTML."""
return rgb_tuple2format(t, format='html')
[docs]def rgb_tuple2name(t):
""" Converts a single RGB tuple into its CSS3 name or to HTML if there is none."""
return rgb_tuple2format(t, format='name')
def rgba2attrs(named_tuple):
return {k: str(v) for k, v in named_tuple._asdict().items()}
def rgba2params(named_tuple):
attrs = rgba2attrs(named_tuple)
return {'color_'+k: v for k, v in attrs.items()}
[docs]def roman_numeral2fifths(rn, global_minor=False):
""" Turn a Roman numeral into a TPC interval (e.g. for transposition purposes).
Uses: split_scale_degree()
"""
if pd.isnull(rn):
return rn
rn_tpcs_maj = {'I': 0, 'II': 2, 'III': 4, 'IV': -1, 'V': 1, 'VI': 3, 'VII': 5}
rn_tpcs_min = {'I': 0, 'II': 2, 'III': -3, 'IV': -1, 'V': 1, 'VI': -4, 'VII': -2}
accidentals, rn_step = split_scale_degree(rn, count=True)
if any(v is None for v in (accidentals, rn_step)):
return None
rn_step = rn_step.upper()
step_tpc = rn_tpcs_min[rn_step] if global_minor else rn_tpcs_maj[rn_step]
return step_tpc + 7 * accidentals
[docs]def roman_numeral2semitones(rn, global_minor=False):
""" Turn a Roman numeral into a semitone distance from the root (0-11).
Uses: split_scale_degree()
"""
if pd.isnull(rn):
return rn
rn_tpcs_maj = {'I': 0, 'II': 2, 'III': 4, 'IV': 5, 'V': 7, 'VI': 9, 'VII': 11}
rn_tpcs_min = {'I': 0, 'II': 2, 'III': 3, 'IV': 5, 'V': 7, 'VI': 8, 'VII': 10}
accidentals, rn_step = split_scale_degree(rn, count=True)
if any(v is None for v in (accidentals, rn_step)):
return None
rn_step = rn_step.upper()
step_tpc = rn_tpcs_min[rn_step] if global_minor else rn_tpcs_maj[rn_step]
return step_tpc + accidentals
[docs]def scale_degree2name(sd, localkey, globalkey):
""" For example, scale degree -1 (fifths, i.e. the subdominant) of the localkey of 'VI' within 'E' minor is 'F'.
Parameters
----------
sd : :obj:`int`
Scale degree expressed as distance from the tonic in fifths.
localkey : :obj:`str`
Local key in which the scale degree is situated, as Roman numeral (can include slash notation such as V/ii).
globalkey : :obj:`str`
Global key as a note name. E.g. `Ab` for Ab major, or 'c#' for C# minor.
Returns
-------
:obj:`str`
The given scale degree, expressed as a note name.
"""
if any(pd.isnull(val) for val in (sd, localkey, globalkey)):
return pd.NA
global_minor = globalkey.islower()
if '/' in localkey:
localkey = resolve_relative_keys(localkey, global_minor)
lk_fifths = roman_numeral2fifths(localkey, global_minor)
gk_fifths = name2fifths(globalkey)
sd_transposed = sd + lk_fifths + gk_fifths
return fifths2name(sd_transposed)
[docs]@function_logger
def scan_directory(directory, file_re=r".*", folder_re=r".*", exclude_re=r"^(\.|_)", recursive=True, subdirs=False, progress=False, exclude_files_only=False):
""" Generator of file names in ``directory``.
Parameters
----------
dir : :obj:`str`
Directory to be scanned for files.
file_re, folder_re : :obj:`str` or :obj:`re.Pattern`, optional
Regular expressions for filtering certain file names or folder names.
The regEx are checked with search(), not match(), allowing for fuzzy search.
recursive : :obj:`bool`, optional
By default, sub-directories are recursively scanned. Pass False to scan only ``dir``.
subdirs : :obj:`bool`, optional
By default, full file paths are returned. Pass True to return (path, name) tuples instead.
progress : :obj:`bool`, optional
By default, the scanning process is shown. Pass False to prevent.
exclude_files_only : :obj:`bool`, optional
By default, ``exclude_re`` excludes files and folder. Pass True to exclude only files matching the regEx.
Yields
------
list
List of full paths meeting the criteria.
"""
def traverse(d):
nonlocal counter
def check_regex(reg, s, excl=exclude_re):
try:
res = re.search(reg, s) is not None and re.search(excl, s) is None
except:
print(reg)
raise
return res
for dir_entry in os.scandir(d):
name = dir_entry.name
path = os.path.join(d, name)
if dir_entry.is_dir() and (recursive or folder_re != '.*'):
for res in traverse(path):
yield res
else:
if pbar is not None:
pbar.update()
if folder_re == '.*':
folder_passes = True
else:
folder_path = os.path.dirname(path)
if recursive:
folder_passes = check_regex(folder_re, folder_path, excl='^$') # passes if the folder path matches the regex
else:
folder = os.path.basename(folder_path)
folder_passes = check_regex(folder_re, folder, excl='^$') # passes if the folder name itself matches the regex
if folder_passes and not exclude_files_only: # True if the exclude_re should also exclude folder names
folder_passes = check_regex(folder_re, folder_path) # is false if any part of the folder path matches exclude_re
if dir_entry.is_file() and folder_passes and check_regex(file_re, name):
counter += 1
if pbar is not None:
pbar.set_postfix({'selected': counter})
if subdirs:
yield (d, name)
else:
yield path
if exclude_re is None or exclude_re == '':
exclude_re = '^$'
directory = resolve_dir(directory)
counter = 0
if not os.path.isdir(directory):
logger.warning("Not an existing directory: " + directory)
pbar = tqdm(desc='Scanning files', unit=' files') if progress else None
return traverse(directory)
[docs]def column_order(df, first_cols=None, sort=True):
"""Sort DataFrame columns so that they start with the order of ``first_cols``, followed by those not included. """
if first_cols is None:
first_cols = [
'mc', 'mc_playthrough', 'mn', 'mn_playthrough', 'quarterbeats', 'mc_onset', 'mn_onset', 'beat', 'event', 'timesig', 'staff', 'voice', 'duration', 'tied',
'gracenote', 'nominal_duration', 'scalar', 'tpc', 'midi', 'volta', 'chord_id']
cols = df.columns
remaining = [col for col in cols if col not in first_cols]
if sort:
remaining = sorted(remaining)
column_order = [col for col in first_cols if col in cols] + remaining
return df[column_order]
[docs]def sort_note_list(df, mc_col='mc', mc_onset_col='mc_onset', midi_col='midi', duration_col='duration'):
""" Sort every measure (MC) by ['mc_onset', 'midi', 'duration'] while leaving gracenotes' order (duration=0) intact.
Parameters
----------
df
mc_col
mc_onset_col
midi_col
duration_col
Returns
-------
"""
is_grace = df[duration_col] == 0
grace_ix = {k: v.to_numpy() for k, v in df[is_grace].groupby([mc_col, mc_onset_col]).groups.items()}
has_nan = df[midi_col].isna().any()
if has_nan:
df.loc[:, midi_col] = df[midi_col].fillna(1000)
normal_ix = df.loc[~is_grace, [mc_col, mc_onset_col, midi_col, duration_col]].groupby([mc_col, mc_onset_col]).apply(
lambda gr: gr.index[np.lexsort((gr.values[:, 3], gr.values[:, 2]))].to_numpy())
sorted_ixs = [np.concatenate((grace_ix[onset], ix)) if onset in grace_ix else ix for onset, ix in
normal_ix.iteritems()]
df = df.reindex(np.concatenate(sorted_ixs)).reset_index(drop=True)
if has_nan:
df.loc[:, midi_col] = df[midi_col].replace({1000: np.nan}).astype('Int64')
return df
[docs]def sort_tpcs(tpcs, ascending=True, start=None):
""" Sort tonal pitch classes by order on the piano.
Uses: fifths2pc()
Parameters
----------
tpcs : collection of :obj:`int`
Tonal pitch classes to sort.
ascending : :obj:`bool`, optional
Pass False to sort by descending order.
start : :obj:`int`, optional
Start on or above this TPC.
"""
res = sorted(tpcs, key=lambda x: (fifths2pc(x), -x))
if start is not None:
pcs = [fifths2pc(tpc) for tpc in res]
start = fifths2pc(start)
i = 0
while i < len(pcs) - 1 and pcs[i] < start:
i += 1
res = res[i:] + res[:i]
return res if ascending else list(reversed(res))
[docs]@function_logger
def split_alternatives(df, column='label', regex=r"-(?!(\d|b+\d|\#+\d))", max=2, inplace=False, alternatives_only=False):
"""
Splits labels that come with an alternative separated by '-' and adds
a new column. Only one alternative is taken into account. `df` is
mutated inplace.
Parameters
----------
df : :obj:`pandas.DataFrame`
Dataframe where one column contains DCML chord labels.
column : :obj:`str`, optional
Name of the column that holds the harmony labels.
regex : :obj:`str`, optional
The regular expression (or simple string) that detects the character combination used to separate alternative annotations.
By default, alternatives are separated by a '-' that does not precede a scale degree such as 'b6' or '3'.
max : :obj:`int`, optional
Maximum number of admitted alternatives, defaults to 2.
inplace : :obj:`bool`, optional
Pass True if you want to mutate ``df``.
alternatives_only : :obj:`bool`, optional
By default the alternatives are added to the original DataFrame (``inplace`` or not).
Pass True if you just need the split alternatives.
Example
-------
>>> import pandas as pd
>>> labels = pd.read_csv('labels.csv')
>>> split_alternatives(labels, inplace=True)
"""
if not inplace:
df = df.copy()
alternatives = df[column].str.split(regex, expand=True)
alternatives.dropna(axis=1, how='all', inplace=True)
alternatives.columns = range(alternatives.shape[1])
if alternatives_only:
columns = [column] + [f"alt_{column}" if i == 1 else f"alt{i}_{column}" for i in alternatives.columns[1:]]
alternatives.columns = columns
return alternatives.iloc[:, :max]
if len(alternatives.columns) > 1:
logger.debug("Labels split into alternatives.")
df.loc[:, column] = alternatives[0]
position = df.columns.get_loc(column) + 1
for i in alternatives.columns[1:]:
if i == max:
break
alt_name = f"alt_{column}" if i == 1 else f"alt{i}_{column}"
df.insert(position, alt_name, alternatives[i].fillna(np.nan)) # replace None by NaN
position += 1
if len(alternatives.columns) > max:
logger.warning(
f"More than {max} alternatives are not taken into account:\n{alternatives[alternatives[2].notna()]}")
else:
logger.debug("Contains no alternative labels.")
if not inplace:
return df
[docs]@function_logger
def split_note_name(nn, count=False):
""" Splits a note name such as 'Ab' into accidentals and name.
nn : :obj:`str`
Note name.
count : :obj:`bool`, optional
Pass True to get the accidentals as integer rather than as string.
"""
m = re.match("^([A-G]|[a-g])(#*|b*)$", str(nn))
if m is None:
logger.error(nn + " is not a valid scale degree.")
return None, None
note_name, accidentals = m.group(1), m.group(2)
if count:
accidentals = accidentals.count('#') - accidentals.count('b')
return accidentals, note_name
[docs]@function_logger
def split_scale_degree(sd, count=False):
""" Splits a scale degree such as 'bbVI' or 'b6' into accidentals and numeral.
sd : :obj:`str`
Scale degree.
count : :obj:`bool`, optional
Pass True to get the accidentals as integer rather than as string.
"""
m = re.match(r"^(#*|b*)(VII|VI|V|IV|III|II|I|vii|vi|v|iv|iii|ii|i|\d)$", str(sd))
if m is None:
logger.error(f"{sd} is not a valid scale degree.")
return None, None
acc, num = m.group(1), m.group(2)
if count:
acc = acc.count('#') - acc.count('b')
return acc, num
# def chunkstring(string, length=80):
# """ Generate chunks of a given length """
# string = str(string)
# return (string[0 + i:length + i] for i in range(0, len(string), length))
#
#
# def string2lines(string, length=80):
# """ Use chunkstring() and make chunks into lines. """
# return '\n'.join(chunkstring(string, length))
@function_logger
def test_binary(command):
if command is None:
return command
if os.path.isfile(command):
logger.debug(f"Found MuseScore binary: {command}")
return command
if which(command) is None:
logger.warning(f"MuseScore binary not found and not an installed command: {command}")
return None
else:
logger.debug(f"Found MuseScore command: {command}")
return command
[docs]def unfold_repeats(df, mc_sequence):
""" Use a succesion of MCs to bring a DataFrame in this succession. MCs may repeat.
Parameters
----------
df : :obj:`pandas.DataFrame`
DataFrame needs to have the columns 'mc' and 'mn'.
mc_sequence : :obj:`pandas.Series`
A Series of the format ``{mc_playthrough: mc}`` where ``mc_playthrough`` corresponds
to continuous MC
"""
############## < v0.5: playthrough <=> mn; >= v0.5: playthrough <=> mc
vc = df.mc.value_counts()
res = df.set_index('mc')
seq = mc_sequence[mc_sequence.isin(res.index)]
playthrough_col = sum([[playthrough] * vc[mc] for playthrough, mc in seq.items()], [])
res = res.loc[seq.values].reset_index()
res.insert(res.columns.get_loc('mc') + 1, 'mc_playthrough', playthrough_col)
return res
@contextmanager
def unpack_mscz(mscz, tmp_dir=None):
if tmp_dir is None:
tmp_dir = os.path.dirname(mscz)
tmp_file = Temp(suffix='.mscx', prefix='.', dir=tmp_dir, delete=False)
with Zip(mscz) as zip_file:
mscx_files = [f for f in zip_file.namelist() if f.endswith('.mscx')]
if len(mscx_files) > 1:
logger.info(f"{mscz} contains several MSCX files. Picking the first one")
mscx = mscx_files[0]
with zip_file.open(mscx) as mscx_file:
with tmp_file as tmp:
for line in mscx_file:
tmp.write(line)
try:
yield tmp_file.name
except:
logger.error(f"Error while dealing with the temporarily unpacked {os.path.basename(mscz)}")
raise
finally:
os.remove(tmp_file.name)
@function_logger
def update_labels_cfg(labels_cfg):
keys = ['staff', 'voice', 'label_type', 'positioning', 'decode', 'column_name', 'color_format']
if 'logger' in labels_cfg:
del(labels_cfg['logger'])
updated = update_cfg(cfg_dict=labels_cfg, admitted_keys=keys, logger=logger)
if 'logger' in updated:
del(updated['logger'])
return updated
@function_logger
def write_metadata(df, path, markdown=True, index=False):
if os.path.isdir(path):
path = os.path.join(path, 'metadata.tsv')
if not os.path.isfile(path):
write_this = df
msg = 'Created'
else:
try:
# Trying to load an existing 'metadata.tsv' file to update overlapping indices, assuming two index levels
previous = pd.read_csv(path, sep='\t', dtype=str, index_col=[0, 1])
ix_union = previous.index.union(df.index)
col_union = previous.columns.union(df.columns)
previous = previous.reindex(index=ix_union, columns=col_union)
previous.loc[df.index, df.columns] = df
write_this = previous
msg = 'Updated'
except:
write_this = df
msg = 'Replaced '
first_cols = ['last_mc', 'last_mn', 'KeySig', 'TimeSig', 'label_count', 'harmony_version',
'annotated_key', 'annotators', 'reviewers', 'composer', 'workTitle', 'movementNumber',
'movementTitle',
'workNumber', 'poet', 'lyricist', 'arranger', 'copyright', 'creationDate',
'mscVersion', 'platform', 'source', 'translator', 'musescore', 'ambitus']
write_this.sort_index(inplace=True)
column_order(write_this, first_cols).to_csv(path, sep='\t', index=index)
logger.info(f"{msg} {path}")
if markdown:
rename4markdown = {
'fnames': 'file_name',
'last_mn': 'measures',
'label_count': 'labels',
'harmony_version': 'standard',
'annotators': 'annotators',
'reviewers': 'reviewers',
}
drop_index = 'fnames' in write_this.columns
md = write_this.reset_index(drop=drop_index).fillna('')
for c in rename4markdown.keys():
if c not in md.columns:
md[c] = ''
md = md.rename(columns=rename4markdown)[list(rename4markdown.values())]
md_table = str(df2md(md))
p = os.path.dirname(path)
readme = os.path.join(p, 'README.md')
if os.path.isfile(readme):
msg = 'Updated'
with open(readme, 'r', encoding='utf-8') as f:
lines = f.readlines()
else:
msg = 'Created'
lines = []
# in case the README exists, everything from the line including '# Overview' (or last line otherwise) is overwritten
with open(readme, 'w', encoding='utf-8') as f:
for line in lines:
if '# Overview' in line:
break
f.write(line)
else:
f.write('\n\n')
f.write(md_table)
logger.info(f"{msg} {readme}")
[docs]def abs2rel_key(absolute, localkey, global_minor=False):
"""
Expresses a Roman numeral as scale degree relative to a given localkey.
The result changes depending on whether Roman numeral and localkey are
interpreted within a global major or minor key.
Uses: :py:func:`split_scale_degree`
Parameters
----------
absolute : :obj:`str`
Relative key expressed as Roman scale degree of the local key.
localkey : :obj:`str`
The local key in terms of which `absolute` will be expressed.
global_minor : bool, optional
Has to be set to True if `absolute` and `localkey` are scale degrees of a global minor key.
Examples
--------
In a minor context, the key of II would appear within the key of vii as #III.
>>> abs2rel_key('iv', 'VI', global_minor=False)
'bvi' # F minor expressed with respect to A major
>>> abs2rel_key('iv', 'vi', global_minor=False)
'vi' # F minor expressed with respect to A minor
>>> abs2rel_key('iv', 'VI', global_minor=True)
'vi' # F minor expressed with respect to Ab major
>>> abs2rel_key('iv', 'vi', global_minor=True)
'#vi' # F minor expressed with respect to Ab minor
>>> abs2rel_key('VI', 'IV', global_minor=False)
'III' # A major expressed with respect to F major
>>> abs2rel_key('VI', 'iv', global_minor=False)
'#III' # A major expressed with respect to F minor
>>> abs2rel_key('VI', 'IV', global_minor=True)
'bIII' # Ab major expressed with respect to F major
>>> abs2rel_key('VI', 'iv', global_minor=False)
'III' # Ab major expressed with respect to F minor
"""
if pd.isnull(absolute):
return np.nan
maj_rn = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII']
min_rn = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii']
shifts = np.array([[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1],
[0, 1, 1, 0, 0, 1, 1],
[0, 0, 0, -1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1],
[0, 0, 1, 0, 0, 1, 1],
[0, 1, 1, 0, 1, 1, 1]])
abs_acc, absolute = split_scale_degree(absolute, count=True, logger=logger)
localkey_acc, localkey = split_scale_degree(localkey, count=True, logger=logger)
shift = abs_acc - localkey_acc
steps = maj_rn if absolute.isupper() else min_rn
key_num = maj_rn.index(localkey.upper())
abs_num = (steps.index(absolute) - key_num) % 7
step = steps[abs_num]
if localkey.islower() and abs_num in [2, 5, 6]:
shift += 1
if global_minor:
key_num = (key_num - 2) % 7
shift -= shifts[key_num][abs_num]
acc = shift * '#' if shift > 0 else -shift * 'b'
return acc + step
[docs]@function_logger
def rel2abs_key(rel, localkey, global_minor=False):
"""
Expresses a Roman numeral that is expressed relative to a localkey
as scale degree of the global key. For local keys {III, iii, VI, vi, VII, vii}
the result changes depending on whether the global key is major or minor.
Uses: :py:func:`split_scale_degree`
Parameters
----------
rel : :obj:`str`
Relative key or chord expressed as Roman scale degree of the local key.
localkey : :obj:`str`
The local key to which `rel` is relative.
global_minor : bool, optional
Has to be set to True if `localkey` is a scale degree of a global minor key.
Examples
--------
If the label viio6/VI appears in the context of the local key VI or vi,
viio6 the absolute key to which viio6 applies depends on the global key.
The comments express the examples in relation to global C major or C minor.
>>> rel2abs_key('vi', 'VI', global_minor=False)
'#iv' # vi of A major = F# minor
>>> rel2abs_key('vi', 'vi', global_minor=False)
'iv' # vi of A minor = F minor
>>> rel2abs_key('vi', 'VI', global_minor=True)
'iv' # vi of Ab major = F minor
>>> rel2abs_key('vi', 'vi', global_minor=True)
'biv' # vi of Ab minor = Fb minor
The same examples hold if you're expressing in terms of the global key
the root of a VI-chord within the local keys VI or vi.
"""
if pd.isnull(rel) or pd.isnull(localkey):
return rel
maj_rn = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII']
min_rn = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii']
shifts = np.array([[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1],
[0, 1, 1, 0, 0, 1, 1],
[0, 0, 0, -1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1],
[0, 0, 1, 0, 0, 1, 1],
[0, 1, 1, 0, 1, 1, 1]])
rel_acc, rel = split_scale_degree(rel, count=True, logger=logger)
localkey_acc, localkey = split_scale_degree(localkey, count=True, logger=logger)
shift = rel_acc + localkey_acc
steps = maj_rn if rel.isupper() else min_rn
rel_num = steps.index(rel)
key_num = maj_rn.index(localkey.upper())
step = steps[(rel_num + key_num) % 7]
if localkey.islower() and rel_num in [2, 5, 6]:
shift -= 1
if global_minor:
key_num = (key_num - 2) % 7
shift += shifts[rel_num][key_num]
acc = shift * '#' if shift > 0 else -shift * 'b'
return acc + step
[docs]@function_logger
def replace_index_by_intervals(df, position_col='quarterbeats', duration_col='duration_qb', closed='left',
filter_zero_duration=False, round=3):
"""Given an annotations table with positions and durations, replaces its index with an :obj:`pandas.IntervalIndex <pandas.IntervalIndex>`.
Underspecified rows are removed.
Parameters
----------
df : :obj:`pandas.DataFrame`
Annotation table containing the columns of ``position_col`` (default: 'quarterbeats') and ``duration_col``
default: 'duration_qb').
position_col : :obj:`str`, optional
Name of the column containing positions.
duration_col : :obj:`str`, optional
Name of the column containing durations.
closed : :obj:`str`, optional
'left', 'right' or 'both' <- defining the interval boundaries
filter_zero_duration : :obj:`bool`, optional
Defaults to False, meaning that rows with zero durations are maintained. Pass True to remove them.
round : :obj:`int`, optional
To how many decimal places to round the intervals' boundary values.
Returns
-------
:obj:`pandas.DataFrame`
"""
if not all(c in df.columns for c in (position_col, duration_col)):
missing = [c for c in (position_col, duration_col) if c not in df.columns]
plural = 's' if len(missing) > 1 else ''
logger.warning(f"Column{plural} not present in DataFrame: {', '.join(missing)}")
return df
mask = df[position_col].notna()
if filter_zero_duration:
mask &= (df[duration_col] > 0)
df = df[mask]
left = df[position_col].astype(float)
right = left + df[duration_col].astype(float)
index_tuples = zip(left.round(round), right.round(round))
df.index = pd.IntervalIndex.from_arrays(left=left.round(round), right=right.round(round), closed=closed)
return df
[docs]def resolve_relative_keys(relativeroot, minor=False):
""" Resolve nested relative keys, e.g. 'V/V/V' => 'VI'.
Uses: :py:func:`rel2abs_key`, :py:func:`str_is_minor`
relativeroot : :obj:`str`
One or several relative keys, e.g. iv/v/VI (fourth scale degree of the fifth scale degree of the sixth scale degree)
minor : :obj:`bool`, optional
Pass True if the last of the relative keys is to be interpreted within a minor context.
"""
if pd.isnull(relativeroot):
return relativeroot
spl = relativeroot.split('/')
if len(spl) < 2:
return relativeroot
if len(spl) == 2:
applied, to = spl
return rel2abs_key(applied, to, minor)
previous, last = '/'.join(spl[:-1]), spl[-1]
return rel2abs_key(resolve_relative_keys(previous, str_is_minor(last, is_name=False)), last, minor)
[docs]def series_is_minor(S, is_name=True):
""" Returns boolean Series where every value in ``S`` representing a minor key/chord is True."""
# regex = r'([A-Ga-g])[#|b]*' if is_name else '[#|b]*(\w+)'
# return S.str.replace(regex, lambda m: m.group(1)).str.islower()
return S.str.islower() # as soon as one character is not lowercase, it should be major
[docs]def str_is_minor(tone, is_name=True):
""" Returns True if ``tone`` represents a minor key or chord."""
# regex = r'([A-Ga-g])[#|b]*' if is_name else '[#|b]*(\w+)'
# m = re.match(regex, tone)
# if m is None:
# return m
# return m.group(1).islower()
return tone.islower()
[docs]@function_logger
def transpose_changes(changes, old_num, new_num, old_minor=False, new_minor=False):
"""
Since the interval sizes expressed by the changes of the DCML harmony syntax
depend on the numeral's position in the scale, these may change if the numeral
is transposed. This function expresses the same changes for the new position.
Chord tone alterations (of 3 and 5) stay untouched.
Uses: :py:func:`changes2tpc`
Parameters
----------
changes : :obj:`str`
A string of changes following the DCML harmony standard.
old_num, new_num : :obj:`str`:
Old numeral, new numeral.
old_minor, new_minor : :obj:`bool`, optional
For each numeral, pass True if it occurs in a minor context.
"""
if pd.isnull(changes):
return changes
old = changes2tpc(changes, old_num, minor=old_minor, root_alterations=True)
new = changes2tpc(changes, new_num, minor=new_minor, root_alterations=True)
res = []
get_acc = lambda n: n * '#' if n > 0 else -n * 'b'
for (full, added, acc, chord_interval, iv1), (_, _, _, _, iv2) in zip(old, new):
if iv1 is None or iv1 == iv2:
res.append(full)
else:
d = iv2 - iv1
if d % 7 > 0:
logger.warning(
f"The difference between the intervals of {full} in {old_num} and {new_num} (in {'minor' if minor else 'major'}) don't differ by chromatic semitones.")
n_acc = acc.count('#') - acc.count('b')
new_acc = get_acc(n_acc - d // 7)
res.append(added + new_acc + chord_interval)
return ''.join(res)
[docs]@function_logger
def features2tpcs(numeral, form=None, figbass=None, changes=None, relativeroot=None, key='C', minor=None,
merge_tones=True, bass_only=False, mc=None):
"""
Given the features of a chord label, this function returns the chord tones
in the order of the inversion, starting from the bass note. The tones are
expressed as tonal pitch classes, where -1=F, 0=C, 1=G etc.
Uses: :py:func:`~.utils.changes2list`, :py:func:`~.utils.name2fifths`, :py:func:`~.utils.resolve_relative_keys`, :py:func:`~.utils.roman_numeral2fifths`,
:py:func:`~.utils.sort_tpcs`, :py:func:`~.utils.str_is_minor`
Parameters
----------
numeral: :obj:`str`
Roman numeral of the chord's root
form: {None, 'M', 'o', '+' '%'}, optional
Indicates the chord type if not a major or minor triad (for which ``form`` is None).
'%' and 'M' can only occur as tetrads, not as triads.
figbass: {None, '6', '64', '7', '65', '43', '2'}, optional
Indicates chord's inversion. Pass None for triad root position.
changes: :obj:`str`, optional
Added steps such as '+6' or suspensions such as '4' or any combination such as (9+64).
Numbers need to be in descending order.
relativeroot: :obj:`str`, optional
Pass a Roman scale degree if `numeral` is to be applied to a different scale
degree of the local key, as in 'V65/V'
key : :obj:`str` or :obj:`int`, optional
The local key expressed as the root's note name or a tonal pitch class.
If it is a name and `minor` is `None`, uppercase means major and lowercase minor.
If it is a tonal pitch class, `minor` needs to be specified.
minor : :obj:`bool`, optional
Pass True for minor and False for major. Can be omitted if `key` is a note name.
This affects calculation of chords related to III, VI and VII.
merge_tones : :obj:`bool`, optional
Pass False if you want the function to return two tuples, one with (potentially suspended)
chord tones and one with added notes.
bass_only : :obj:`bool`, optional
Return only the bass note instead of all chord tones.
mc : int or str
Pass measure count to display it in warnings.
"""
if pd.isnull(numeral) or numeral == '@none':
if bass_only or merge_tones:
return np.nan
else:
return {
'chord_tones': np.nan,
'added_tones': np.nan,
'root': np.nan,
}
form, figbass, changes, relativeroot = tuple(
'' if pd.isnull(val) else val for val in (form, figbass, changes, relativeroot))
label = f"{numeral}{form}{figbass}{'(' + changes + ')' if changes != '' else ''}{'/' + relativeroot if relativeroot != '' else ''}"
MC = '' if mc is None else f'MC {mc}: '
if minor is None:
try:
minor = str_is_minor(key, is_name=True)
logger.debug(f"Mode inferred from {key}.")
except:
raise ValueError(f"If parameter 'minor' is not specified, 'key' needs to be a string, not {key}")
key = name2fifths(key)
if form in ['%', 'M', '+M']:
assert figbass in ['7', '65', '43',
'2'], f"{MC}{label}: {form} requires figbass (7, 65, 43, or 2) since it specifies a chord's seventh."
if relativeroot != '':
resolved = resolve_relative_keys(relativeroot, minor)
rel_minor = str_is_minor(resolved, is_name=False)
transp = roman_numeral2fifths(resolved, minor)
logger.debug(
f"{MC}Chord applied to {relativeroot}. Therefore transposing it by {transp} fifths.")
return features2tpcs(numeral=numeral, form=form, figbass=figbass, relativeroot=None, changes=changes,
key=key + transp, minor=rel_minor, merge_tones=merge_tones, bass_only=bass_only, mc=mc,
logger=logger)
if numeral.lower() == '#vii' and not minor:
# logger.warning(
# f"{MC}{label} in a major context is most probably an annotation error.")
logger.warning(f"{MC}{numeral} in major context corrected to {numeral[1:]}.")
numeral = numeral[1:]
root_alteration, num_degree = split_scale_degree(numeral, count=True, logger=logger)
# build 2-octave diatonic scale on C major/minor
root = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII'].index(num_degree.upper())
tpcs = 2 * [i + key for i in (0, 2, -3, -1, 1, -4, -2)] if minor else 2 * [i + key for i in (0, 2, 4, -1, 1, 3, 5)]
# starting the scale from chord root, i.e. root will be tpcs[0], the chord's seventh tpcs[6] etc.
tpcs = tpcs[root:] + tpcs[:root]
root = tpcs[0] + 7 * root_alteration
tpcs[0] = root # octave stays diatonic, is not altered
logger.debug(f"{num_degree}: The {'minor' if minor else 'major'} scale starting from the root: {tpcs}")
def set_iv(chord_interval, interval_size):
""" Add to the interval of a given chord interval in `tpcs` (both viewed from the root note).
Parameters
----------
chord_interval : :obj:`int`
Pass 0 for the root note, 2 for the third, 8 for the ninth etc.
interval_size : :obj:`int`
Stack-of-fifths interval.
"""
nonlocal tpcs, root
iv = root + interval_size
tpcs[chord_interval] = iv
tpcs[chord_interval + 7] = iv
is_triad = figbass in ['', '6', '64']
is_seventh_chord = figbass in ['7', '65', '43', '2']
if not is_triad and not is_seventh_chord:
raise ValueError(f"{MC}{figbass} is not a valid chord inversion.")
if form == 'o':
set_iv(2, -3)
set_iv(4, -6)
if is_seventh_chord:
set_iv(6, -9)
elif form == '%':
set_iv(2, -3)
set_iv(4, -6)
set_iv(6, -2)
elif form == '+':
set_iv(2, 4)
set_iv(4, 8)
if is_seventh_chord:
set_iv(6, -2)
elif form == '+M':
set_iv(2, 4)
set_iv(4, 8)
set_iv(6, 5)
else: # triad with or without major or minor seven
set_iv(4, 1)
if num_degree.isupper():
set_iv(2, 4)
else:
set_iv(2, -3)
if form == 'M':
set_iv(6, 5)
elif is_seventh_chord:
set_iv(6, -2)
tone_functions = (0, 2, 4, 6) if is_seventh_chord else (0, 2, 4)
root_position = {i: [tpcs[i]] for i in tone_functions}
replacements = {i: [] for i in tone_functions}
def replace_chord_tone(which, by):
nonlocal root_position, replacements
if which in root_position:
root_position[which] = []
replacements[which].insert(0, by)
else:
logger.warning(f"Only chord tones [0,2,4,(6)] can be replaced, not {which}")
# apply changes
alts = changes2list(changes, sort=False)
added_notes = []
for full, add_remove, acc, chord_interval in alts:
added = add_remove == '+'
substracted = add_remove == '-'
replacing_upper = add_remove == '^'
replacing_lower = add_remove == 'v'
chord_interval = int(chord_interval) - 1
### From here on, `chord_interval` is decremented, i.e. the root is 0, the seventh is 6 etc. (just like in `tpcs`)
if (chord_interval == 0 and not substracted) or chord_interval > 13:
logger.warning(
f"{MC}Change {full} is meaningless and ignored because it concerns chord tone {chord_interval + 1}.")
continue
next_octave = chord_interval > 7
shift = 7 * (acc.count('#') - acc.count('b'))
new_val = tpcs[chord_interval] + shift
if substracted:
if chord_interval not in tone_functions:
logger.warning(
f"{MC}The change {full} has no effect because it concerns an interval which is not implied by {numeral}{form}{figbass}.")
else:
root_position[chord_interval] = []
elif added:
added_notes.append(new_val)
elif next_octave:
if any((replacing_lower, replacing_upper, substracted)):
logger.warning(f"{MC}{full[0]} has no effect in {full} because the interval is larger than an octave.")
added_notes.append(new_val)
elif chord_interval in [1, 3, 5]: # these are changes to scale degree 2, 4, 6 that replace the lower neighbour unless they have a # or ^
if '#' in acc or replacing_upper:
if '#' in acc and replacing_upper:
logger.warning(f"{MC}^ is redundant in {full}.")
if chord_interval == 5 and is_triad: # leading tone to 7 but not in seventh chord
added_notes.append(new_val)
else:
replace_chord_tone(chord_interval + 1, new_val)
else:
if replacing_lower:
logger.warning(f"{MC}v is redundant in {full}.")
replace_chord_tone(chord_interval - 1, new_val)
else: # chord tone alterations
if replacing_lower:
# TODO: This must be possible, e.g. V(6v5) where 5 is suspension of 4
logger.warning(f"{MC}{full} -> chord tones cannot replace neighbours, use + instead.")
elif chord_interval == 6 and figbass != '7': # 7th are a special case:
if figbass == '': # in root position triads they are added
# TODO: The standard is lacking a distinction, because the root in root pos. can also be replaced from below!
added_notes.append(new_val)
elif figbass in ['6', '64'] or '#' in acc: # in inverted triads they replace the root, as does #7
replace_chord_tone(0, new_val)
else: # otherwise they are unclear
logger.warning(
f"{MC}In seventh chords, such as {label}, it is not clear whether the {full} alters the 7 or replaces the 8 and should not be used.")
elif tpcs[chord_interval] == new_val:
logger.warning(
f"{MC}The change {full} has no effect in {numeral}{form}{figbass}")
else:
root_position[chord_interval] = [new_val]
figbass2bass = {
'': 0,
'7': 0,
'6': 1,
'65': 1,
'64': 2,
'43': 2,
'2': 3
}
bass = figbass2bass[figbass]
chord_tones = []
for tf in tone_functions[bass:] + tone_functions[:bass]:
chord_tone, replacing_tones = root_position[tf], replacements[tf]
if chord_tone == replacing_tones == []:
logger.debug(f"{MC}{label} results in a chord without {tf + 1}.")
if chord_tone != []:
chord_tones.append(chord_tone[0])
if replacing_tones != []:
logger.warning(f"{MC}{label} results in a chord tone {tf + 1} AND its replacement(s) {replacing_tones}.")
chord_tones.extend(replacing_tones)
bass_tpc = chord_tones[0]
if bass_only:
return bass_tpc
elif merge_tones:
return tuple(sort_tpcs(chord_tones + added_notes, start=bass_tpc))
else:
return {
'chord_tones': tuple(chord_tones),
'added_tones': tuple(added_notes),
'root': root,
}