import re
from typing import Literal, Collection, Generator, Tuple, Union, Dict, Optional, List, Iterator
import io
import sys, os
import traceback
from collections import Counter, defaultdict
import pandas as pd
import numpy as np
from git import Repo, InvalidGitRepositoryError
from gitdb.exc import BadName
from .corpus import Corpus
from .annotations import Annotations
from .logger import LoggedClass, get_logger
from .piece import Piece
from .score import Score
from ._typing import FileDict, FileList, CorpusFnameTuple, ScoreFacets, FileDataframeTupleMaybe, FacetArguments, FileParsedTuple, FileDataframeTuple, ScoreFacet, AnnotationsFacet
from .utils import column_order, get_musescore, group_id_tuples, iter_selection, get_first_level_corpora, join_tsvs, load_tsv, make_continuous_offset_series, \
make_id_tuples, make_playthrough2mc, METADATA_COLUMN_ORDER, metadata2series, parse_ignored_warnings_file, pretty_dict, resolve_dir, \
update_labels_cfg, write_tsv, available_views2str, path2parent_corpus, resolve_paths_argument, enforce_fname_index_for_metadata
from .view import View, create_view_from_parameters, DefaultView
[docs]class Parse(LoggedClass):
"""
Class for creating one or several :class:`~.corpus.Corpus` objects and performing actions on all of them.
"""
[docs] def __init__(self,
directory: Optional[Union[str, Collection[str]]] = None,
recursive: bool = True,
only_metadata_fnames: bool = True,
include_convertible: bool = False,
include_tsv: bool = True,
exclude_review: bool = True,
file_re: Optional[Union[str, re.Pattern]] = None,
folder_re: Optional[Union[str, re.Pattern]] = None,
exclude_re: Optional[Union[str, re.Pattern]] = None,
paths: Optional[Collection[str]] = None,
labels_cfg: dict = {},
ms=None,
**logger_cfg):
""" Initialize a Parse object and try to create corpora if directories and/or file paths are specified.
Args:
directory: Path to scan for corpora.
recursive: Pass False if you don't want to scan ``directory`` for subcorpora, but force making it a corpus instead.
only_metadata_fnames:
The default view excludes piece names that are not listed in the corpus' metadata.tsv file (e.g. when none was found).
Pass False to include all pieces regardless. This might be needed when setting ``recursive`` to False.
include_convertible:
The default view excludes scores that would need conversion to MuseScore format prior to parsing.
Pass True to include convertible scores in .musicxml, .midi, .cap or any other format that MuseScore 3 can open.
For on-the-fly conversion, however, the parameter ``ms`` needs to be set.
include_tsv: The default view includes TSV files. Pass False to disregard them and parse only scores.
exclude_review:
The default view excludes files and folders whose name contains 'review'.
Pass False to include these as well.
file_re: Pass a regular expression if you want to create a view filtering out all files that do not contain it.
folder_re: Pass a regular expression if you want to create a view filtering out all folders that do not contain it.
exclude_re: Pass a regular expression if you want to create a view filtering out all files or folders that contain it.
paths:
If ``directory`` is specified, the file names of these paths are used to create a filtering view excluding all other files.
Otherwise, all paths are expected to be part of the same parent corpus which will be inferred from the first path by looking for the first parent directory that
either contains a 'metadata.tsv' file or is a git. This parameter is deprecated and ``file_re`` should be used instead.
labels_cfg: Pass a configuration dict to detect only certain labels or change their output format.
ms:
If you pass the path to your local MuseScore 3 installation, ms3 will attempt to parse musicXML, MuseScore 2,
and other formats by temporarily converting them. If you're using the standard path, you may try 'auto', or 'win' for
Windows, 'mac' for MacOS, or 'mscore' for Linux. In case you do not pass the 'file_re' and the MuseScore executable is
detected, all convertible files are automatically selected, otherwise only those that can be parsed without conversion.
**logger_cfg: Keyword arguments for changing the logger configuration. E.g. ``level='d'`` to see all debug messages.
"""
if 'level' not in logger_cfg or (logger_cfg['level'] is None):
logger_cfg['level'] = 'w'
super().__init__(subclass='Parse', logger_cfg=logger_cfg)
self.corpus_paths: Dict[str, str] = {}
"""{corpus_name -> path} dictionary with each corpus's base directory. Generally speaking, each corpus path is expected to contain a ``metadata.tsv`` and, maybe, to be a git.
"""
self.corpus_objects: Dict[str, Corpus] = {}
"""{corpus_name -> Corpus} dictionary with one object per :attr:`corpus_path <corpus_paths>`.
"""
self._ms = get_musescore(ms, logger=self.logger)
self._views: dict = {}
initial_view = create_view_from_parameters(only_metadata_fnames=only_metadata_fnames,
include_convertible=include_convertible,
include_tsv=include_tsv,
exclude_review=exclude_review,
paths=paths,
file_re=file_re,
folder_re=folder_re,
exclude_re=exclude_re,
level=self.logger.getEffectiveLevel())
self._views[None] = initial_view
if initial_view.name != 'default':
self._views['default'] = DefaultView(level=self.logger.getEffectiveLevel())
self._views['all'] = View(level=self.logger.getEffectiveLevel())
#
# self._ignored_warnings = defaultdict(list)
# """:obj:`collections.defaultdict`
# {'logger_name' -> [(message_id), ...]} This dictionary stores the warnings to be ignored
# upon loading them from an IGNORED_WARNINGS file.
# """
#
self.labels_cfg = {
'staff': None,
'voice': None,
'harmony_layer': None,
'positioning': False,
'decode': True,
'column_name': 'label',
'color_format': None,
}
""":obj:`dict`
Configuration dictionary to determine the output format of :py:attr:`~.score.Score.labels` and
:py:attr:`~.score.Score.expanded` tables. The dictonary is passed to :py:attr:`~.score.Score` upon parsing.
"""
self.labels_cfg.update(update_labels_cfg(labels_cfg, logger=self.logger))
if directory is not None:
if isinstance(directory, str):
directory = [directory]
for d in directory:
self.add_dir(directory=d, recursive=recursive)
if paths is not None:
self.add_files(paths)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%% END of __init__() %%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
@property
def ms(self) -> str:
"""Path or command of the local MuseScore 3 installation if specified by the user and recognized."""
return self._ms
@ms.setter
def ms(self, ms):
self._ms = get_musescore(ms, logger=self.logger)
@property
def n_detected(self) -> int:
"""Number of detected files aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_detected for _, corpus in self)
@property
def n_orphans(self) -> int:
"""Number of files that are always disregarded because they could not be attributed to any of the fnames."""
return sum(len(corpus.ix2orphan_file) for _, corpus in self)
@property
def n_parsed(self) -> int:
"""Number of parsed files aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_parsed for _, corpus in self)
@property
def n_parsed_scores(self) -> int:
"""Number of parsed scores aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_parsed_scores for _, corpus in self)
@property
def n_parsed_tsvs(self) -> int:
"""Number of parsed TSV files aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_parsed_tsvs for _, corpus in self)
@property
def n_unparsed_scores(self) -> int:
"""Number of all detected but not yet parsed scores, aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_unparsed_scores for _, corpus in self)
@property
def n_unparsed_tsvs(self) -> int:
"""Number of all detected but not yet parsed TSV files, aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_unparsed_tsvs for _, corpus in self)
@property
def parsed_mscx(self) -> pd.DataFrame:
"""Deprecated property. Replaced by :attr:`n_parsed_scores`"""
raise AttributeError(f"Property has been renamed to n_parsed_scores.")
@property
def parsed_tsv(self) -> pd.DataFrame:
"""Deprecated property. Replaced by :attr:`n_parsed_tsvs`"""
raise AttributeError(f"Property has been renamed to n_parsed_tsvs.")
@property
def view(self) -> View:
"""Retrieve the current View object. Shorthand for :meth:`get_view`."""
return self.get_view()
@view.setter
def view(self, new_view: View):
if not isinstance(new_view, View):
return TypeError("If you want to switch to an existing view, use its name like an attribute or "
"call _.switch_view().")
self.set_view(new_view)
@property
def views(self) -> None:
"""Display a short description of the available views."""
print(pretty_dict({"[active]" if k is None else k: v for k, v in self._views.items()}, "view_name", "Description"))
@property
def view_name(self) -> str:
"""Get the name of the active view."""
return self.get_view().name
@view_name.setter
def view_name(self, new_name):
view = self.get_view()
view.name = new_name
@property
def view_names(self):
return {view.name if name is None else name for name, view in self._views.items()}
[docs] def add_corpus(self,
directory: str,
corpus_name: Optional[str] = None,
**logger_cfg) -> None:
"""
This method creates a :class:`~.corpus.Corpus` object which scans the directory ``directory`` for parseable files.
It inherits all :class:`Views <.view.View>` from the Parse object.
Args:
directory: Directory to scan for files.
corpus_name:
By default, the folder name of ``directory`` is used as name for this corpus. Pass a string to
use a different identifier.
**logger_cfg:
Keyword arguments for configuring the logger of the new Corpus object. E.g. ``level='d'`` to see all debug messages.
Note that the logger is a child logger of this Parse object's logger and propagates, so it might filter debug messages.
You can use _.change_logger_cfg(level='d') to change the level post hoc.
"""
directory = resolve_dir(directory)
if not os.path.isdir(directory):
self.logger.warning(f"{directory} is not an existing directory.")
return
new_logger_cfg = dict(self.logger_cfg)
new_logger_cfg.update(logger_cfg)
if corpus_name is None:
corpus_name = os.path.basename(directory).strip(r"\/")
logger_cfg['name'] = self.logger.name + '.' + corpus_name.replace('.', '')
try:
corpus = Corpus(directory=directory, view=self.get_view(), ms=self.ms, **logger_cfg)
except AssertionError:
self.logger.debug(f"{directory} contains no parseable files.")
return
corpus.set_view(**{view_name: view for view_name, view in self._views.items() if view_name is not None})
if len(corpus.files) == 0:
self.logger.info(f"No parseable files detected in {directory}. Skipping...")
return
if corpus_name is None:
corpus_name = os.path.basename(directory).strip(r'\/')
if corpus_name in self.corpus_paths:
existing_path = self.corpus_paths[corpus_name]
if existing_path == directory:
self.logger.warning(f"Corpus '{corpus_name}' had already been present and was overwritten, i.e., reset.")
else:
self.logger.warning(f"Corpus '{corpus_name}' had already been present for the path {existing_path} and "
f"was replaced by {directory}")
self.corpus_paths[corpus_name] = directory
self.corpus_objects[corpus_name] = corpus
# convertible = self.ms is not None
# if file_re is None:
# file_re = Score._make_extension_regex(tsv=True, convertible=convertible)
# if exclude_re is None:
# exclude_re = r'(^(\.|_|concatenated_)|_reviewed)'
# directory = resolve_dir(directory)
# self.last_scanned_dir = directory
# if key is None:
# key = os.path.basename(directory)
# if key not in self.files:
# self.logger.debug(f"Adding {directory} as new corpus with key '{key}'.")
# self.files[key] = []
# self.corpus_paths[key] = resolve_dir(directory)
# else:
# self.logger.info(f"Adding {directory} to existing corpus with key '{key}'.")
#
# top_level_folders, top_level_files = first_level_files_and_subdirs(directory)
# self.logger.debug(f"Top level folders: {top_level_folders}\nTop level files: {top_level_files}")
#
# added_ids = []
#
# # look for scores
# scores_folder = None
# if 'scores' in kwargs and kwargs['scores'] in top_level_folders:
# scores_folder = kwargs['scores']
# elif 'MS3' in top_level_folders:
# scores_folder = 'MS3'
# elif 'scores' in top_level_folders:
# scores_folder = 'scores'
# else:
# msg = f"No scores folder found among {top_level_folders}."
# if 'scores' not in kwargs:
# msg += " If one of them has MuseScore files, indicate it by passing scores='scores_folder'."
# self.logger.info(msg)
# if scores_folder is not None:
# score_re = Score._make_extension_regex(convertible=convertible)
# scores_path = os.path.join(directory, scores_folder)
# score_paths = sorted(scan_directory(scores_path, file_re=score_re, recursive=recursive))
# score_ids = self.add_files(paths=score_paths, key=key)
# added_ids += score_ids
# score_fnames = self._get_unambiguous_fnames_from_ids(score_ids, key=key)
#
# for fname, id in score_fnames.items():
# piece = self._get_piece(key, fname)
# piece.type2file_info['score'] = self.id2file_info[id]
# self.id2piece_id[id] = (key, fname)
# # if fname in self.corpus2fname2score[key]:
# # if self.corpus2fname2score[key][fname] == id:
# # self.debug(f"'{fname} had already been matched to {id}.")
# # else:
# # self.warning(f"'{fname} had already been matched to {self.corpus2fname2score[key][fname]}")
# self.corpus2fname2score[key].update(score_fnames)
#
# # look for metadata
# if 'metadata.tsv' in top_level_files:
# default_metadata_path = os.path.join(directory, 'metadata.tsv')
# self.logger.debug(f"'metadata.tsv' was detected and added.")
# added_ids += self.add_files(paths=default_metadata_path, key=key)
# metadata_id = added_ids[-1]
# self.parse_tsv(ids=[metadata_id])
# metadata_tsv = self._parsed_tsv[metadata_id]
# metadata_fnames = metadata_tsv.fnames
# else:
# metadata_id = None
#
#
#
# return
# paths = sorted(
# scan_directory(directory, file_re=file_re, folder_re=folder_re, exclude_re=exclude_re,
# recursive=recursive, logger=self.logger))
# if len(paths) == 0:
# self.logger.info(f"No matching files found in {directory}.")
# return
# added_ids = self.add_files(paths=paths, key=key)
# if len(added_ids) == 0:
# self.logger.debug(f"No files from {directory} have been added.")
# return
# _, first_i = added_ids[0]
# if 'metadata.tsv' in self.files[key][first_i:]:
# self.logger.debug(f"Found metadata.tsv for corpus '{key}'.")
# elif 'metadata.tsv' in self.files[key]:
# self.logger.debug(f"Had already found metadata.tsv for corpus '{key}'.")
# else:
# # if no metadata have been found (e.g. because excluded via file_re), add them if they're there
# default_metadata_path = os.path.join(directory, 'metadata.tsv')
# if os.path.isfile(default_metadata_path):
# self.logger.info(f"'metadata.tsv' was detected and automatically added for corpus '{key}'.")
# metadata_id = self.add_files(paths=default_metadata_path, key=key)
# added_ids += metadata_id
# else:
# self.logger.info(f"No metadata found for corpus '{key}'.")
# self.corpus_paths[key] = directory
# self.look_for_ignored_warnings(directory, key)
[docs] def add_dir(self, directory: str,
recursive: bool = True,
**logger_cfg) -> None:
"""
This method decides if the directory ``directory`` contains several corpora or if it is a corpus
itself, and calls :meth:`add_corpus` for each corpus.
Args:
directory: Directory to scan for corpora.
recursive:
By default, if any of the first-level subdirectories contains a 'metadata.tsv' or is a git, all first-level
subdirectories of ``directory`` are treated as corpora, i.e. one :class:`~.corpus.Corpus` object per folder is created.
Pass False to prevent this, which is equivalent to calling :meth:`add_corpus(directory) <add_corpus>`
**logger_cfg:
Keyword arguments for configuring the logger of the new Corpus objects. E.g. ``level='d'`` to see all debug messages.
Note that the loggers are child loggers of this Parse object's logger and propagate, so it might filter debug messages.
You can use _.change_logger_cfg(level='d') to change the level post hoc.
"""
directory = resolve_dir(directory)
if not os.path.isdir(directory):
self.logger.warning(f"{directory} is not an existing directory.")
return
if not recursive:
self.add_corpus(directory, **logger_cfg)
return
# new corpus/corpora to be added
subdir_corpora = sorted(get_first_level_corpora(directory, logger=self.logger))
n_corpora = len(subdir_corpora)
if n_corpora == 0:
self.logger.debug(f"Treating {directory} as corpus.")
self.add_corpus(directory, **logger_cfg)
else:
self.logger.debug(f"{n_corpora} individual corpora detected in {directory}.")
for corpus_path in subdir_corpora:
self.add_corpus(corpus_path, **logger_cfg)
[docs] def add_files(self, paths: Union[str, Collection[str]], corpus_name: Optional[str] = None) -> None:
"""
Deprecated: To deal with particular files only, use :meth:`add_corpus` passing the directory containing them and
configure the :class`~.view.View` accordingly. This method here does it for you but easily leads to unexpected behaviour.
It expects the file paths to point to files located in a shared corpus folder
on some higher level or in folders for which :class:`~.corpus.Corpus` objects have already been created.
Args:
paths: Collection of file paths. Only existing files can be added.
corpus_name:
* By default, I will try to attribute the files to existing :class:`~.corpus.Corpus` objects based on their paths. This makes sense only when new files have
been created after the directories were scanned.
* For paths that do no not contain an existing corpus_path, I will try to detect the parent directory that is a corpus (based on it being a git or containing a ``metadata.tsv``).
If this is without success for the first path, I will raise an error. Otherwise, all subsequent paths will be considered to be part of that same corpus (watch out
meaningless relative paths!).
* You can pass a folder name contained in the first path to create a new corpus, assuming that all other paths are contained in it (watch out meaningless relative paths!).
* Pass an existing corpus_name to add the files to a particular corpus. Note that all parseable files under the corpus_path are detected anyway, and if you add files
from other directories, it will lead to invalid relative paths that work only on your system. If you're adding files that have been created after the Corpus object
has, you can leave this parameter empty; paths will be attributed to the existing corpora automatically.
"""
resolved_paths = resolve_paths_argument(paths, logger=self.logger)
if len(resolved_paths) == 0:
return
if corpus_name is None:
add_to_existing = defaultdict(list)
no_parent = []
for path in resolved_paths:
part_of = None
for corpus_name, corpus_path in self.corpus_paths.items():
if corpus_path in path:
part_of = corpus_name
add_to_existing[part_of].append(path)
break
if part_of is None:
no_parent.append(path)
for corpus_name, paths in add_to_existing.items():
self.get_corpus(corpus_name).add_file_paths(paths)
if len(no_parent) > 0:
# paths are expected to be contained in one and the same corpus directory
first_path = no_parent[0]
directory = path2parent_corpus(first_path)
if directory is None:
raise ValueError(f"No parent of {first_path} has been recognized as a corpus by being a git or containing a 'metadata.tsv'. Use _.add_corpus()")
self.add_corpus(directory)
elif corpus_name in self.corpus_paths:
self.get_corpus(corpus_name).add_file_paths(paths)
else:
# find the path according to the corpus_name
first_path = resolved_paths[0]
if corpus_name in first_path:
while True:
tmp_path, last_component = os.path.split(first_path)
if tmp_path == first_path:
# reached the root
first_path = ''
break
if last_component == corpus_name:
# first_path is the corpus first_path
break
first_path = tmp_path
else:
first_path == ''
if first_path == '':
raise ValueError(f"corpus_name needs to be a folder contained in the first path, but '{corpus_name}' isn't.")
self.add_corpus(first_path)
# corpus = self.get_corpus(corpus_name)
# new_view = create_view_from_parameters(only_metadata_fnames=False, exclude_review=False, paths=paths)
# corpus.set_view(new_view)
def color_non_chord_tones(self,
color_name: str = 'red',
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all', ) -> Dict[CorpusFnameTuple, List[FileDataframeTuple]]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name):
fname2reports = corpus.color_non_chord_tones(color_name,
view_name=view_name,
force=force,
choose=choose)
result.update({(corpus_name, fname): report for fname, report in fname2reports.items()})
return result
[docs] def change_labels_cfg(self, labels_cfg={}, staff=None, voice=None, harmony_layer=None, positioning=None, decode=None, column_name=None, color_format=None):
""" Update :obj:`Parse.labels_cfg` and retrieve new 'labels' tables accordingly.
Parameters
----------
labels_cfg : :obj:`dict`
Using an entire dictionary or, to change only particular options, choose from:
staff, voice, harmony_layer, positioning, decode, column_name
Arguments as they will be passed to :py:meth:`~ms3.annotations.Annotations.get_labels`
"""
keys = ['staff', 'voice', 'harmony_layer', 'positioning', 'decode', 'column_name', 'color_format']
for k in keys:
val = locals()[k]
if val is not None:
labels_cfg[k] = val
updated = update_labels_cfg(labels_cfg, logger=self.logger)
self.labels_cfg.update(updated)
for score in self._parsed_mscx.values():
score.change_labels_cfg(labels_cfg=updated)
ids = list(self._labellists.keys())
if len(ids) > 0:
self._extract_and_cache_dataframes(ids=ids, labels=True)
[docs] def compare_labels(self,
key: str = 'detached',
new_color: str = 'ms3_darkgreen',
old_color: str = 'ms3_darkred',
detached_is_newer: bool = False,
add_to_rna: bool = True,
view_name: Optional[str] = None) -> Tuple[int, int]:
""" Compare detached labels ``key`` to the ones attached to the Score to create a diff.
By default, the attached labels are considered as the reviewed version and labels that have changed or been added
in comparison to the detached labels are colored in green; whereas the previous versions of changed labels are
attached to the Score in red, just like any deleted label.
Args:
key: Key of the detached labels you want to compare to the ones in the score.
new_color, old_color:
The colors by which new and old labels are differentiated. Identical labels remain unchanged. Colors can be
CSS colors or MuseScore colors (see :py:attr:`utils.MS3_COLORS`).
detached_is_newer:
Pass True if the detached labels are to be added with ``new_color`` whereas the attached changed labels
will turn ``old_color``, as opposed to the default.
add_to_rna:
By default, new labels are attached to the Roman Numeral layer.
Pass False to attach them to the chord layer instead.
Returns:
Number of scores in which labels have changed.
Number of scores in which no label has chnged.
"""
changed, unchanged = 0, 0
for _, corpus in self.iter_corpora(view_name=view_name):
c, u = corpus.compare_labels(key=key,
new_color=new_color,
old_color=old_color,
detached_is_newer=detached_is_newer,
add_to_rna=add_to_rna,
view_name=view_name)
changed += c
unchanged += u
return changed, unchanged
def count_changed_scores(self, view_name: Optional[str] = None):
return sum(corpus.count_changed_scores() for _, corpus in self.iter_corpora(view_name))
[docs] def count_extensions(self,
view_name: Optional[str] = None,
per_piece: bool = False,
include_metadata: bool = False,
):
""" Count file extensions.
Parameters
----------
keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
Key(s) for which to count file extensions. By default, all keys are selected.
ids : :obj:`~collections.abc.Collection`
If you pass a collection of IDs, ``keys`` is ignored and only the selected extensions are counted.
per_key : :obj:`bool`, optional
If set to True, the results are returned as a dict {key: Counter},
otherwise the counts are summed up in one Counter.
per_subdir : :obj:`bool`, optional
If set to True, the results are returned as {key: {subdir: Counter} }. ``per_key=True`` is therefore implied.
Returns
-------
:obj:`dict`
By default, the function returns a Counter of file extensions (Counters are converted to dicts).
If ``per_key`` is set to True, a dictionary {key: Counter} is returned, separating the counts.
If ``per_subdir`` is set to True, a dictionary {key: {subdir: Counter} } is returned.
Args:
view_name:
"""
extension_counters = {corpus_name: corpus.count_extensions(view_name, include_metadata=include_metadata) for corpus_name, corpus in self.iter_corpora(view_name)}
if per_piece:
return {(corpus_name, fname): dict(cnt) for corpus_name, fname2cnt in extension_counters.items() for fname, cnt in fname2cnt.items()}
return {corpus_name: dict(sum(fname2cnt.values(), Counter())) for corpus_name, fname2cnt in extension_counters.items()}
def count_files(self,
detected=True,
parsed=True,
as_dict: bool = False,
drop_zero: bool = True,
view_name: Optional[str] = None) -> Union[pd.DataFrame, dict]:
all_counts = {corpus_name: corpus._summed_file_count(types=detected, parsed=parsed, view_name=view_name) for corpus_name, corpus in self.iter_corpora(view_name=view_name)}
counts_df = pd.DataFrame.from_dict(all_counts, orient='index', dtype='Int64')
if drop_zero:
empty_cols = counts_df.columns[counts_df.sum() == 0]
counts_df = counts_df.drop(columns=empty_cols)
if as_dict:
return counts_df.to_dict(orient='index')
counts_df.index.rename('corpus', inplace=True)
return counts_df
def count_parsed_scores(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_score_files(view_name=view_name).values()))
def count_parsed_tsvs(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_tsv_files(view_name=view_name).values()))
def count_unparsed_scores(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_score_files(view_name=view_name).values()))
def count_unparsed_tsvs(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_tsv_files(view_name=view_name).values()))
def create_missing_metadata_tsv(self,
view_name: Optional[str] = None) -> None:
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
if corpus.metadata_tsv is None:
path = corpus.create_metadata_tsv()
def extract_facets(self,
facets: ScoreFacets = None,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
unfold: bool = False,
interval_index: bool = False,
flat=False,
concatenate=True) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, List[FileDataframeTuple]], List[FileDataframeTuple]]]]:
return self._aggregate_corpus_data('extract_facets',
facets=facets,
view_name=view_name,
force=force,
choose=choose,
unfold=unfold,
interval_index=interval_index,
flat=flat,
concatenate=concatenate,
)
def get_all_parsed(self, facets: FacetArguments = None,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all',
flat: bool = False,
include_empty=False,
concatenate: bool = True,
) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, FileParsedTuple], List[FileParsedTuple]]]]:
return self._aggregate_corpus_data('get_all_parsed',
facets=facets,
view_name=view_name,
force=force,
choose=choose,
flat=flat,
include_empty=include_empty,
concatenate=concatenate)
def get_corpus(self, name) -> Corpus:
assert name in self.corpus_objects, f"Don't have a corpus called '{name}', only {list(self.corpus_objects.keys())}"
return self.corpus_objects[name]
[docs] def get_dataframes(self,
notes: bool = False,
rests: bool = False,
notes_and_rests: bool = False,
measures: bool = False,
events: bool = False,
labels: bool = False,
chords: bool = False,
expanded: bool = False,
form_labels: bool = False,
cadences: bool = False,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all',
unfold: bool = False,
interval_index: bool = False,
flat=False,
include_empty: bool = False,
) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, List[FileDataframeTuple]], List[FileDataframeTuple]]]]:
"""Renamed to :meth:`get_facets`."""
l = locals()
facets = [facet for facet in ScoreFacet.__args__ if l[facet]]
return self.get_facets(facets=facets,
view_name=view_name,
force=force,
choose=choose,
unfold=unfold,
interval_index=interval_index,
flat=flat,
include_empty=include_empty)
def get_facets(self,
facets: ScoreFacets = None,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all',
unfold: bool = False,
interval_index: bool = False,
flat=False,
include_empty=False,
concatenate=True,
) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, List[FileDataframeTuple]], List[FileDataframeTuple]]]]:
return self._aggregate_corpus_data('get_facets',
facets=facets,
view_name=view_name,
force=force,
choose=choose,
unfold=unfold,
interval_index=interval_index,
flat=flat,
include_empty=include_empty,
concatenate=concatenate,
)
def get_files(self, facets: FacetArguments = None,
view_name: Optional[str] = None,
parsed: bool = True,
unparsed: bool = True,
choose: Literal['all', 'auto', 'ask'] = 'all',
flat: bool = False,
include_empty=False,
) -> Dict[CorpusFnameTuple, Union[FileDict, FileList]]:
return self._aggregate_corpus_data('get_files',
facets=facets,
view_name=view_name,
parsed=parsed,
unparsed=unparsed,
choose=choose,
flat=flat,
include_empty=include_empty
)
[docs] def get_piece(self, corpus_name: str, fname: str) -> Piece:
"""Returns an existing Piece object."""
assert corpus_name in self.corpus_objects, f"'{corpus_name}' is not an existing corpus. Choose from {list(self.corpus_objects.keys())}"
return self.corpus_objects[corpus_name].get_piece(fname)
[docs] def get_view(self,
view_name: Optional[str] = None,
**config
) -> View:
"""Retrieve an existing or create a new View object, potentially while updating the config."""
if view_name in self._views:
view = self._views[view_name]
elif view_name is not None and self._views[None].name == view_name:
view = self._views[None]
else:
view = self.get_view().copy(new_name=view_name)
old_name = view.name
view.name = view_name
self._views[view_name] = view
self.logger.info(f"New view '{view_name}' created as a copy of '{old_name}'.")
if len(config) > 0:
view.update_config(**config)
return view
def info(self, view_name: Optional[str] = None, return_str: bool = False, show_discarded: bool = False):
""""""
header = f"All corpora"
header += "\n" + "-" * len(header) + "\n"
# start info message with the names of the available views, the header, and info on the active view.
view = self.get_view(view_name)
view.reset_filtering_data()
msg = available_views2str(self._views, view_name)
msg += header
view_info = f"View: {view}"
if view_name is None:
corpus_views = [corpus.get_view().name for _, corpus in self.iter_corpora(view_name=view_name)]
if len(set(corpus_views)) > 1:
view_info = f"This is a mixed view. Call _.info(view_name) to see a homogeneous one."
msg += view_info + "\n\n"
# Show info on all pieces and files included in the active view
counts_df = self.count_files(view_name=view_name)
if len(counts_df) == 0:
if self.n_detected == 0:
msg += 'No files detected. Use _.add_corpus().'
else:
msg += 'No files selected under the current view. You could use _.all to see everything.'
else:
if counts_df.isna().any().any():
counts_df = counts_df.fillna(0).astype('int')
additional_columns = []
for corpus_name in counts_df.index:
corpus = self.get_corpus(corpus_name)
has_metadata = 'no' if corpus.metadata_tsv is None else 'yes'
corpus_view = corpus.get_view().name
additional_columns.append([has_metadata, corpus_view])
additional_columns = pd.DataFrame(additional_columns, columns=[('has', 'metadata'), ('active', 'view')], index=counts_df.index)
info_df = pd.concat([additional_columns, counts_df], axis=1)
info_df.columns = pd.MultiIndex.from_tuples(info_df.columns)
msg += info_df.to_string()
n_changed_scores = self.count_changed_scores(view_name)
if n_changed_scores > 0:
msg += f"\n\n{n_changed_scores} scores have changed since parsing."
filtering_report = view.filtering_report(show_discarded=show_discarded, return_str=True)
if filtering_report != '':
msg += '\n\n' + filtering_report
if self.n_orphans > 0:
msg += f"\n\nThere are {self.n_orphans} orphans that could not be attributed to any of the respective corpus's fnames"
if show_discarded:
msg += ':'
for name, corpus in self:
if corpus.n_orphans > 0:
msg += f"\n\t{name}: {list(corpus.ix2orphan_file.values())}"
else:
msg += "."
if return_str:
return msg
print(msg)
# ids = list(self._iterids(keys))
# info = f"{len(ids)} files.\n"
# if subdirs:
# exts = self.count_extensions(keys, per_subdir=True)
# for key, subdir_exts in exts.items():
# info += key + '\n'
# for line in pretty_dict(subdir_exts).split('\n'):
# info += ' ' + line + '\n'
# else:
# exts = self.count_extensions(keys, per_key=True)
# info += pretty_dict(exts, heading='EXTENSIONS')
# parsed_mscx_ids = [id for id in ids if id in self._parsed_mscx]
# parsed_mscx = len(parsed_mscx_ids)
# ext_counts = self.count_extensions(keys, per_key=False)
# others = len(self._score_ids(opposite=True))
# mscx = len(self._score_ids())
# by_conversion = len(self._score_ids(native=False))
# if parsed_mscx > 0:
#
# if parsed_mscx == mscx:
# info += f"\n\nAll {mscx} MSCX files have been parsed."
# else:
# info += f"\n\n{parsed_mscx}/{mscx} MSCX files have been parsed."
# annotated = sum(True for id in parsed_mscx_ids if id in self._annotations)
# if annotated == mscx:
# info += f"\n\nThey all have annotations attached."
# else:
# info += f"\n\n{annotated} of them have annotations attached."
# if annotated > 0:
# layers = self.count_annotation_layers(keys, which='attached', per_key=True)
# info += f"\n{pretty_dict(layers, heading='ANNOTATION LAYERS')}"
#
# detached = sum(True for id in parsed_mscx_ids if self._parsed_mscx[id].has_detached_annotations)
# if detached > 0:
# info += f"\n\n{detached} of them have detached annotations:"
# layers = self.count_annotation_layers(keys, which='detached', per_key=True)
# try:
# info += f"\n{pretty_dict(layers, heading='ANNOTATION LAYERS')}"
# except:
# print(layers)
# raise
# elif '.mscx' in ext_counts:
# if mscx > 0:
# info += f"\n\nNone of the {mscx} score files have been parsed."
# if by_conversion > 0 and self.ms is None:
# info += f"\n{by_conversion} files would need to be converted, for which you need to set the 'ms' property to your MuseScore 3 executable."
# if self.ms is not None:
# info += "\n\nMuseScore 3 executable has been found."
#
#
# parsed_tsv_ids = [id for id in ids if id in self._parsed_tsv]
# parsed_tsv = len(parsed_tsv_ids)
# if parsed_tsv > 0:
# annotations = sum(True for id in parsed_tsv_ids if id in self._annotations)
# if parsed_tsv == others:
# info += f"\n\nAll {others} tabular files have been parsed, {annotations} of them as Annotations object(s)."
# else:
# info += f"\n\n{parsed_tsv}/{others} tabular files have been parsed, {annotations} of them as Annotations object(s)."
# if annotations > 0:
# layers = self.count_annotation_layers(keys, which='tsv', per_key=True)
# info += f"\n{pretty_dict(layers, heading='ANNOTATION LAYERS')}"
#
# if return_str:
# return info
# print(info)
[docs] def iter_corpora(self, view_name: Optional[str] = None) -> Generator[Tuple[str, Corpus], None, None]:
"""Iterate through corpora under the current or specified view."""
view = self.get_view(view_name)
for corpus_name, corpus in view.filter_by_token('corpora', self):
if view_name not in corpus._views:
if view_name is None:
corpus.set_view(view)
else:
corpus.set_view(**{view_name: view})
yield corpus_name, corpus
def iter_pieces(self) -> Tuple[CorpusFnameTuple, Piece]:
for corpus_name, corpus in self:
for fname, piece in corpus:
yield (corpus_name, fname), piece
def load_facet_into_scores(self,
facet: AnnotationsFacet,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
git_revision: Optional[str] = None,
key: str = 'detached',
infer: bool = True,
**cols) -> Dict[str, int]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name):
result[corpus_name] = corpus.load_facet_into_scores(facet=facet,
view_name=view_name,
force=force,
choose=choose,
git_revision=git_revision,
key=key,
infer=infer,
**cols)
return result
[docs] def load_ignored_warnings(self, path: str) -> None:
""" Adds a filters to all loggers included in a IGNORED_WARNINGS file.
Args:
path: Path of the IGNORED_WARNINGS file.
"""
for _, corpus in self:
_ = corpus.load_ignored_warnings(path)
[docs] def set_view(self, active: View = None, **views: View):
"""Register one or several view_name=View pairs."""
if active is not None:
new_name = active.name
if new_name in self._views and active != self._views[new_name]:
self.logger.info(f"The existing view called '{new_name}' has been overwritten")
del(self._views[new_name])
old_view = self._views[None]
self._views[old_view.name] = old_view
self._views[None] = active
for view_name, view in views.items():
if view.name is None:
view.name = view_name
self._views[view_name] = view
for corpus_name, corpus in self:
if active is not None and active.check_token('corpus', corpus_name):
corpus.set_view(active)
for view_name, view in views.items():
if view.check_token('corpus', corpus_name):
corpus.set_view(**{view_name: view})
def switch_view(self, view_name: str,
show_info: bool = True,
propagate = True,
) -> None:
if view_name is None:
return
new_view = self.get_view(view_name)
old_view = self.get_view()
self._views[old_view.name] = old_view
self._views[None] = new_view
new_name = new_view.name
if new_name in self._views:
del(self._views[new_name])
if propagate:
for corpus_name, corpus in self:
active_view = corpus.get_view()
if active_view.name != new_name or active_view != new_view:
corpus.set_view(new_view)
if show_info:
self.info()
def update_scores(self,
root_dir: Optional[str] = None,
folder: str = '.',
suffix: str = '',
overwrite: bool = False):
for _, corpus in self.iter_corpora():
corpus.update_scores(root_dir=root_dir,
folder=folder,
suffix=suffix,
overwrite=overwrite)
[docs] def update_tsvs_on_disk(self,
facets: ScoreFacets = 'tsv',
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
) -> List[str]:
"""
Update existing TSV files corresponding to one or several facets with information freshly extracted from a parsed
score, but only if the contents are identical. Otherwise, the existing TSV file is not overwritten and the
differences are displayed in a log warning. The purpose is to safely update the format of existing TSV files,
(for instance with respect to column order) making sure that the content doesn't change.
Args:
facets:
view_name:
force:
By default, only TSV files that have already been parsed are updated. Set to True in order to
force-parse for each facet one of the TSV files included in the given view, if necessary.
choose:
Returns:
List of paths that have been overwritten.
"""
paths = []
for _, corpus in self.iter_corpora(view_name=view_name):
paths.extend(corpus.update_tsvs_on_disk(facets=facets,
view_name=view_name,
force=force,
choose=choose))
return paths
def _aggregate_corpus_data(self,
method,
view_name=None,
concatenate=False,
**kwargs
):
result = {}
for corpus_name, corpus in self.iter_corpora(view_name):
corpus_method= getattr(corpus, method)
corpus_result = corpus_method(view_name=view_name, **kwargs)
for fname, piece_result in corpus_result.items():
result[(corpus_name, fname)] = piece_result
if concatenate:
keys, dataframes = [], []
flat = kwargs['flat']
if flat:
add_index_level = any(len(piece_result) > 1 for piece_result in result.values())
else:
add_index_level = any(len(file_dataframe_tuples) > 1 for piece_result in result.values() for file_dataframe_tuples in piece_result.values())
for corpus_fname, piece_result in result.items():
if flat:
n_tuples = len(piece_result)
if n_tuples == 0:
continue
keys.append(corpus_fname)
if n_tuples == 1:
if add_index_level:
file, df = piece_result[0]
df = pd.concat([df], keys=[file.rel_path])
dataframes.append(df)
else:
dataframes.append(piece_result[0][1])
else:
files, dfs = list(zip(*piece_result))
ix_level = [file.rel_path for file in files]
concat = pd.concat(dfs, keys=ix_level)
dataframes.append(concat)
else:
for facet, file_dataframe_tuples in piece_result.items():
n_tuples = len(file_dataframe_tuples)
if n_tuples == 0:
continue
keys.append(corpus_fname + (facet,))
if n_tuples == 1:
if add_index_level:
file, df = file_dataframe_tuples[0]
df = pd.concat([df], keys=[file.rel_path])
dataframes.append(df)
else:
dataframes.append(file_dataframe_tuples[0][1])
else:
files, dfs = list(zip(*file_dataframe_tuples))
ix_level = [file.rel_path for file in files]
concat = pd.concat(dfs, keys=ix_level)
dataframes.append(concat)
if len(dataframes) > 0:
try:
result = pd.concat(dataframes, keys=keys)
except ValueError:
n_levels = [df.columns.nlevels for df in dataframes]
if len(set(n_levels)) > 1:
# this error might come from various form label dataframes with varying numbers of column levels
adapted_dataframes = []
for df in dataframes:
if df.columns.nlevels == 2:
adapted_dataframes.append(df)
else:
loc = df.columns.get_loc('form_label')
adapted_dataframes.append(pd.concat([df.iloc[:, :loc], df.iloc[:, loc:]], keys=['', 'a'], axis=1))
result = pd.concat(adapted_dataframes, keys=keys)
else:
raise
nlevels = result.index.nlevels
level_names = ['corpus', 'fname']
if not flat:
level_names.append('facet')
if len(level_names) < nlevels - 1:
level_names.append('ix')
level_names.append('i')
result.index.rename(level_names, inplace=True)
else:
return pd.DataFrame()
return result
def _get_parsed_score_files(self, view_name: Optional[str] = None) -> Dict[CorpusFnameTuple, FileList]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('scores', view_name=view_name, unparsed=False, flat=True)
result[corpus_name] = sum(fname2files.values(), [])
return result
def _get_unparsed_score_files(self, view_name: Optional[str] = None) -> Dict[CorpusFnameTuple, FileList]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('scores', view_name=view_name, parsed=False, flat=True)
result[corpus_name] = sum(fname2files.values(), [])
return result
def _get_parsed_tsv_files(self, view_name: Optional[str] = None, flat: bool = True) -> Dict[CorpusFnameTuple, Union[FileDict, FileList]]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('tsv', view_name=view_name, unparsed=False, flat=flat)
if flat:
result[corpus_name] = sum(fname2files.values(), [])
else:
dd = defaultdict(list)
for fname, typ2files in fname2files.items():
for typ, files in typ2files.items():
dd[typ].extend(files)
result[corpus_name] = dict(dd)
return result
def _get_unparsed_tsv_files(self, view_name: Optional[str] = None, flat: bool = True) -> Dict[CorpusFnameTuple, Union[FileDict, FileList]]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('tsv', view_name=view_name, parsed=False, flat=flat)
if flat:
result[corpus_name] = sum(fname2files.values(), [])
else:
dd = defaultdict(list)
for fname, typ2files in fname2files.items():
for typ, files in typ2files.items():
dd[typ].extend(files)
result[corpus_name] = dict(dd)
return result
def __getattr__(self, view_name) -> View:
if view_name in self.view_names:
if view_name != self.view_name:
self.switch_view(view_name, show_info=False)
return self
else:
raise AttributeError(f"'{view_name}' is not an existing view. Use _.get_view('{view_name}') to create it.")
#
#
# def count_annotation_layers(self, keys=None, which='attached', per_key=False):
# """ Counts the labels for each annotation layer defined as (staff, voice, harmony_layer).
# By default, only labels attached to a score are counted.
#
# Parameters
# ----------
# keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
# Key(s) for which to count annotation layers. By default, all keys are selected.
# which : {'attached', 'detached', 'tsv'}, optional
# 'attached': Counts layers from annotations attached to a score.
# 'detached': Counts layers from annotations that are in a Score object, but detached from the score.
# 'tsv': Counts layers from Annotation objects that have been loaded from or into annotation tables.
# per_key : :obj:`bool`, optional
# If set to True, the results are returned as a dict {key: Counter}, otherwise the counts are summed up in one Counter.
# If ``which='detached'``, the keys are keys from Score objects, otherwise they are keys from this Parse object.
#
# Returns
# -------
# :obj:`dict` or :obj:`collections.Counter`
# By default, the function returns a Counter of labels for every annotation layer (staff, voice, harmony_layer)
# If ``per_key`` is set to True, a dictionary {key: Counter} is returned, separating the counts.
# """
# res_dict = defaultdict(Counter)
#
# if which == 'detached':
# for id in self._iterids(keys, only_detached_annotations=True):
# for key, annotations in self._parsed_mscx[id]._detached_annotations.items():
# if key != 'annotations':
# _, layers = annotations.annotation_layers
# layers_dict = {tuple(None if pd.isnull(e) else e for e in t): count for t, count in
# layers.to_dict().items()}
# res_dict[key].update(layers_dict)
# elif which in ['attached', 'tsv']:
# for key, i in self._iterids(keys):
# if (key, i) in self._annotations:
# ext = self.fexts[key][i]
# if (which == 'attached' and ext == '.mscx') or (which == 'tsv' and ext != '.mscx'):
# _, layers = self._annotations[(key, i)].annotation_layers
# layers_dict = {tuple(None if pd.isnull(e) else e for e in t): count for t, count in
# layers.to_dict().items()}
# res_dict[key].update(layers_dict)
# else:
# self.logger.error(f"Parameter 'which' needs to be one of {{'attached', 'detached', 'tsv'}}, not {which}.")
# return {} if per_key else pd.Series()
#
#
# def make_series(counts):
# if len(counts) == 0:
# return pd.Series()
# data = counts.values()
# ks = list(counts.keys())
# #levels = len(ks[0])
# names = ['staff', 'voice', 'harmony_layer', 'color'] #<[:levels]
# ix = pd.MultiIndex.from_tuples(ks, names=names)
# return pd.Series(data, ix)
#
# if per_key:
# res = {k: make_series(v) for k, v in res_dict.items()}
# else:
# res = make_series(sum(res_dict.values(), Counter()))
# if len(res) == 0:
# self.logger.info("No annotations found. Maybe no scores have been parsed using parse_scores()?")
# return res
#
#
#
#
#
#
# def count_labels(self, keys=None, per_key=False):
# """ Count label types.
#
# Parameters
# ----------
# keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
# Key(s) for which to count label types. By default, all keys are selected.
# per_key : :obj:`bool`, optional
# If set to True, the results are returned as a dict {key: Counter},
# otherwise the counts are summed up in one Counter.
#
# Returns
# -------
# :obj:`dict` or :obj:`collections.Counter`
# By default, the function returns a Counter of label types.
# If ``per_key`` is set to True, a dictionary {key: Counter} is returned, separating the counts.
# """
# annotated = [id for id in self._iterids(keys) if id in self._annotations]
# res_dict = defaultdict(Counter)
# for key, i in annotated:
# res_dict[key].update(self._annotations[(key, i)].harmony_layer_counts)
# if len(res_dict) == 0:
# if len(self._parsed_mscx) == 0:
# self.logger.error("No scores have been parsed so far. Use parse_scores().")
# else:
# self.logger.info("None of the scores contain annotations.")
# if per_key:
# return {k: dict(v) for k, v in res_dict.items()}
# return dict(sum(res_dict.values(), Counter()))
#
[docs] def detach_labels(self, keys=None, annotation_key='detached', staff=None, voice=None, harmony_layer=None, delete=True):
""" Calls :py:meth:`Score.detach_labels<ms3.score.Score.detach_labels` on every parsed score with key ``key``.
"""
assert annotation_key != 'annotations', "The key 'annotations' is reserved, please choose a different one."
ids = list(self._iterids(keys, only_attached_annotations=True))
if len(ids) == 0:
self.logger.info(f"Selection did not contain scores with labels: keys = '{keys}'")
for id in ids:
score = self._parsed_mscx[id]
try:
score.detach_labels(key=annotation_key, staff=staff, voice=voice, harmony_layer=harmony_layer, delete=delete)
except:
score.logger.error(f"Detaching labels failed with the following error:\n{sys.exc_info()[1]}")
self._collect_annotations_objects_references(ids=ids)
def score_metadata(self, view_name: Optional[str] = None) -> pd.DataFrame:
metadata_dfs = {corpus_name: corpus.score_metadata(view_name=view_name) for corpus_name, corpus in self.iter_corpora(view_name=view_name)}
metadata = pd.concat(metadata_dfs.values(), keys=metadata_dfs.keys(), names=['corpus', 'fname'])
return metadata
def metadata(self,
view_name: Optional[str] = None,
choose: Optional[Literal['auto', 'ask']] = None) -> pd.DataFrame:
metadata_dfs = {corpus_name: corpus.metadata(view_name=view_name, choose=choose)
for corpus_name, corpus in self.iter_corpora(view_name=view_name)}
metadata = pd.concat(metadata_dfs.values(), keys=metadata_dfs.keys(), names=['corpus', 'fname'])
return metadata
[docs] def store_parsed_scores(self,
view_name: Optional[str] = None,
only_changed: bool = True,
root_dir: Optional[str] = None,
folder: str = '.',
suffix: str = '',
overwrite: bool = False,
simulate=False) -> Dict[str, List[str]]:
""" Stores all parsed scores under this view as MuseScore 3 files.
Args:
view_name:
only_changed:
By default, only scores that have been modified since parsing are written. Set to False to store
all scores regardless.
root_dir:
folder:
suffix: Suffix to append to the original file name.
overwrite: Pass True to overwrite existing files.
simulate: Set to True if no files are to be written.
Returns:
Paths of the stored files.
"""
paths = {}
for corpus_name, corpus in self.iter_corpora(view_name):
paths[corpus_name] = corpus.store_parsed_scores(view_name=view_name,
only_changed=only_changed,
root_dir=root_dir,
folder=folder,
suffix=suffix,
overwrite=overwrite,
simulate=simulate)
return paths
[docs] def parse(self, view_name=None, level=None, parallel=True, only_new=True, labels_cfg={}, cols={}, infer_types=None, **kwargs):
""" Shorthand for executing parse_scores and parse_tsv at a time.
Args:
view_name:
"""
self.parse_scores(level=level, parallel=parallel, only_new=only_new, labels_cfg=labels_cfg, view_name=view_name)
self.parse_tsv(view_name=view_name, level=level, cols=cols, infer_types=infer_types, only_new=only_new, **kwargs)
[docs] def parse_mscx(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`parse_scores`."""
raise AttributeError(f"Method not in use any more. Use Parse.parse_scores().")
[docs] def parse_scores(self,
level: str = None,
parallel: bool = True,
only_new: bool = True,
labels_cfg: dict = {},
view_name: str = None,
choose: Literal['all', 'auto', 'ask'] = 'all'):
""" Parse MuseScore 3 files (MSCX or MSCZ) and store the resulting read-only Score objects. If they need
to be writeable, e.g. for removing or adding labels, pass ``parallel=False`` which takes longer but prevents
having to re-parse at a later point.
Parameters
----------
keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
For which key(s) to parse all MSCX files.
ids : :obj:`~collections.abc.Collection`
To parse only particular files, pass their IDs. ``keys`` and ``fexts`` are ignored in this case.
level : {'W', 'D', 'I', 'E', 'C', 'WARNING', 'DEBUG', 'INFO', 'ERROR', 'CRITICAL'}, optional
Pass a level name for which (and above which) you want to see log records.
parallel : :obj:`bool`, optional
Defaults to True, meaning that all CPU cores are used simultaneously to speed up the parsing. It implies
that the resulting Score objects are in read-only mode and that you might not be able to use the computer
during parsing. Pass False to parse one score after the other, which uses more memory but will allow
making changes to the scores.
only_new : :obj:`bool`, optional
By default, score which already have been parsed, are not parsed again. Pass False to parse them, too.
Returns
-------
None
"""
if level is not None:
self.change_logger_cfg(level=level)
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
corpus.parse_scores(level=level,
parallel=parallel,
only_new=only_new,
labels_cfg=labels_cfg,
view_name=view_name,
choose=choose)
[docs] def parse_tsv(self,
view_name=None,
level=None,
cols={},
infer_types=None,
only_new=True,
choose: Literal['all', 'auto', 'ask'] = 'all',
**kwargs):
""" Parse TSV files (or other value-separated files such as CSV) to be able to do something with them.
Parameters
----------
keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
Key(s) for which to parse all non-MSCX files. By default, all keys are selected.
ids : :obj:`~collections.abc.Collection`
To parse only particular files, pass there IDs. ``keys`` and ``fexts`` are ignored in this case.
fexts : :obj:`str` or :obj:`~collections.abc.Collection`, optional
If you want to parse only files with one or several particular file extension(s), pass the extension(s)
cols : :obj:`dict`, optional
By default, if a column called ``'label'`` is found, the TSV is treated as an annotation table and turned into
an Annotations object. Pass one or several column name(s) to treat *them* as label columns instead. If you
pass ``{}`` or no label column is found, the TSV is parsed as a "normal" table, i.e. a DataFrame.
infer_types : :obj:`dict`, optional
To recognize one or several custom label type(s), pass ``{name: regEx}``.
level : {'W', 'D', 'I', 'E', 'C', 'WARNING', 'DEBUG', 'INFO', 'ERROR', 'CRITICAL'}, optional
Pass a level name for which (and above which) you want to see log records.
**kwargs:
Arguments for :py:meth:`pandas.DataFrame.to_csv`. Defaults to ``{'sep': '\t', 'index': False}``. In particular,
you might want to update the default dictionaries for ``dtypes`` and ``converters`` used in :py:func:`load_tsv`.
Returns
-------
None
Args:
only_new:
view_name:
"""
if level is not None:
self.change_logger_cfg(level=level)
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
corpus.parse_tsv(view_name=view_name,
cols=cols,
infer_types=infer_types,
only_new=only_new,
choose=choose,
**kwargs)
# def _parse_tsv_from_git_revision(self, tsv_id, revision_specifier):
# """ Takes the ID of an annotation table, and parses the same file's previous version at ``revision_specifier``.
#
# Parameters
# ----------
# tsv_id
# ID of the TSV file containing an annotation table, for which to parse a previous version.
# revision_specifier : :obj:`str`
# String used by git.Repo.commit() to find the desired git revision.
# Can be a long or short SHA, git tag, branch name, or relative specifier such as 'HEAD~1'.
#
# Returns
# -------
# ID
# (key, i) of the newly added annotation table.
# """
# key, i = tsv_id
# corpus_path = self.corpus_paths[key]
# try:
# repo = Repo(corpus_path, search_parent_directories=True)
# except InvalidGitRepositoryError:
# self.logger.error(f"{corpus_path} seems not to be (part of) a git repository.")
# return
# try:
# git_repo = repo.remote("origin").url
# except ValueError:
# git_repo = os.path.basename()
# try:
# commit = repo.commit(revision_specifier)
# commit_sha = commit.hexsha
# short_sha = commit_sha[:7]
# commit_info = f"{short_sha} with message '{commit.message}'"
# except BadName:
# self.logger.error(f"{revision_specifier} does not resolve to a commit for repo {git_repo}.")
# return
# tsv_type = self._tsv_types[tsv_id]
# tsv_path = self.full_paths[key][i]
# rel_path = os.path.relpath(tsv_path, corpus_path)
# new_directory = os.path.join(corpus_path, short_sha)
# new_path = os.path.join(new_directory, self.files[key][i])
# if new_path in self.full_paths[key]:
# existing_i = self.full_paths[key].index(new_path)
# existing_tsv_type = self._tsv_types[(key, existing_i)]
# if tsv_type == existing_tsv_type:
# self.logger.error(f"Had already loaded a {tsv_type} table for commit {commit_info} of repo {git_repo}.")
# return
# if not tsv_type in ('labels', 'expanded'):
# raise NotImplementedError(f"Currently, only annotations are to be loaded from a git revision but {rel_path} is a {tsv_type}.")
# try:
# targetfile = commit.tree / rel_path
# except KeyError:
# # if the file was not found, try and see if at the time of the git revision the folder was still called 'harmonies'
# if tsv_type == 'expanded':
# folder, tsv_name = os.path.split(rel_path)
# if folder != 'harmonies':
# old_rel_path = os.path.join('harmonies', tsv_name)
# try:
# targetfile = commit.tree / old_rel_path
# self.logger.debug(f"{rel_path} did not exist at commit {commit_info}, using {old_rel_path} instead.")
# rel_path = old_rel_path
# except KeyError:
# self.logger.error(f"Neither {rel_path} nor its older version {old_rel_path} existed at commit {commit_info}.")
# return
# else:
# self.logger.error(f"{rel_path} did not exist at commit {commit_info}.")
# return
# self.logger.info(f"Successfully loaded {rel_path} from {commit_info}.")
# try:
# with io.BytesIO(targetfile.data_stream.read()) as f:
# df = load_tsv(f)
# except Exception:
# self.logger.error(f"Parsing {rel_path} @ commit {commit_info} failed with the following error:\n{sys.exc_info()[1]}")
# return
# new_id = self._handle_path(new_path, key, skip_checks=True)
# self._parsed_tsv[new_id] = df
# self._dataframes[tsv_type][new_id] = df
# self._tsv_types[new_id] = tsv_type
# logger_cfg = dict(self.logger_cfg)
# logger_cfg['name'] = self.logger_names[(key, i)]
# if tsv_id in self._annotations:
# anno_obj = self._annotations[tsv_id] # get Annotation object's settings from the existing one
# cols = anno_obj.cols
# infer_types = anno_obj.regex_dict
# else:
# cols = dict(label='label')
# infer_types = None
# self._annotations[new_id] = Annotations(df=df, cols=cols, infer_types=infer_types,
# **logger_cfg)
# self.logger.debug(
# f"{rel_path} successfully parsed from commit {short_sha}.")
# return new_id
#
#
# def pieces(self, parsed_only=False):
# pieces_dfs = [self[k].pieces(parsed_only=parsed_only) for k in self.keys()]
# result = pd.concat(pieces_dfs, keys=self.keys())
# result.index.names = ['key', 'metadata_row']
# return result
#
# def store_scores(self, keys=None, ids=None, root_dir=None, folder='.', suffix='', overwrite=False, simulate=False):
# """ Stores the parsed MuseScore files in their current state, e.g. after detaching or attaching annotations.
#
# Parameters
# ----------
# keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
# Key(s) for which to count file extensions. By default, all keys are selected.
# ids : :obj:`~collections.abc.Collection`
# If you pass a collection of IDs, ``keys`` is ignored and only the selected extensions are counted.
# root_dir : :obj:`str`, optional
# Defaults to None, meaning that the original root directory is used that was added to the Parse object.
# Otherwise, pass a directory to rebuild the original substructure. If ``folder`` is an absolute path,
# ``root_dir`` is ignored.
# folder : :obj:`str`
# Where to store the file. Can be relative to ``root_dir`` or absolute, in which case ``root_dir`` is ignored.
# If ``folder`` is relative, the behaviour depends on whether it starts with a dot ``.`` or not: If it does,
# the folder is created at every end point of the relative tree structure under ``root_dir``. If it doesn't,
# it is created only once, relative to ``root_dir``, and the relative tree structure is build below.
# suffix : :obj:`str`, optional
# Suffix to append to the original file name.
# overwrite : :obj:`bool`, optional
# Pass True to overwrite existing files.
# simulate : :obj:`bool`, optional
# Pass True if no files are to be written.
#
# Returns
# -------
#
# """
# if ids is None:
# ids = [id for id in self._iterids(keys) if id in self._parsed_mscx]
# paths = []
# for key, i in ids:
# new_path = self._store_scores(key=key, i=i, folder=folder, suffix=suffix, root_dir=root_dir, overwrite=overwrite, simulate=simulate)
# if new_path is not None:
# if new_path in paths:
# modus = 'would have' if simulate else 'has'
# self.logger.info(f"The score at {new_path} {modus} been overwritten.")
# else:
# paths.append(new_path)
# if simulate:
# return list(set(paths))
#
#
#
#
# def _collect_annotations_objects_references(self, keys=None, ids=None):
# """ Updates the dictionary self._annotations with all parsed Scores that have labels attached (or not any more). """
# if ids is None:
# ids = list(self._iterids(keys, only_parsed_mscx=True))
# updated = {}
# for id in ids:
# if id in self._parsed_mscx:
# score = self._parsed_mscx[id]
# if score is not None:
# if 'annotations' in score:
# updated[id] = score.annotations
# elif id in self._annotations:
# del (self._annotations[id])
# else:
# del (self._parsed_mscx[id])
# self._annotations.update(updated)
#
#
# def _iterids(self, keys=None, only_parsed_mscx=False, only_parsed_tsv=False, only_attached_annotations=False, only_detached_annotations=False):
# """Iterator through IDs for a given set of keys.
#
# Parameters
# ----------
# keys
# only_parsed_mscx
# only_attached_annotations
# only_detached_annotations
#
# Yields
# ------
# :obj:`tuple`
# (str, int)
#
# """
# keys = self._treat_key_param(keys)
# for key in sorted(keys):
# for id in make_id_tuples(key, len(self.fnames[key])):
# if only_parsed_mscx or only_attached_annotations or only_detached_annotations:
# if id not in self._parsed_mscx:
# continue
# if only_attached_annotations:
# if 'annotations' in self._parsed_mscx[id]:
# pass
# else:
# continue
# elif only_detached_annotations:
# if self._parsed_mscx[id].has_detached_annotations:
# pass
# else:
# continue
# elif only_parsed_tsv:
# if id in self._parsed_tsv:
# pass
# else:
# continue
#
# yield id
#
# def _iter_subdir_selectors(self, keys=None, ids=None):
# """ Iterate through the specified ids grouped by subdirs.
#
# Yields
# ------
# :obj:`tuple`
# (key: str, subdir: str, ixs: list) tuples. IDs can be created by combining key with each i in ixs.
# The yielded ``ixs`` are typically used as parameter for ``.utils.iter_selection``.
#
# """
# grouped_ids = self._make_grouped_ids(keys, ids)
# for k, ixs in grouped_ids.items():
# subdirs = self.subdirs[k]
# for subdir in sorted(set(iter_selection(subdirs, ixs))):
# yield k, subdir, [i for i in ixs if subdirs[i] == subdir]
#
#
#
#
# def _parse(self, key, i, logger_cfg={}, labels_cfg={}, read_only=False):
# """Performs a single parse and returns the resulting Score object or None."""
# path = self.full_paths[key][i]
# file = self.files[key][i]
# self.logger.debug(f"Attempting to parse {file}")
# try:
# logger_cfg['name'] = self.logger_names[(key, i)]
# score = Score(path, read_only=read_only, labels_cfg=labels_cfg, logger_cfg=logger_cfg, ms=self.ms)
# if score is None:
# self.logger.debug(f"Encountered errors when parsing {file}")
# else:
# self.logger.debug(f"Successfully parsed {file}")
# return score
# except (KeyboardInterrupt, SystemExit):
# self.logger.info("Process aborted.")
# raise
# except:
# self.logger.error(f"Unable to parse {path} due to the following exception:\n" + traceback.format_exc())
# return None
#
#
# def _score_ids(self, keys=None, score_extensions=None, native=True, convertible=True, opposite=False):
# """ Return IDs of all detected scores with particular file extensions, or all others if ``opposite==True``.
#
# Parameters
# ----------
# keys : :obj:`str` or :obj:`collections.abc.Iterable`, optional
# Only get IDs for particular keys.
# score_extensions : :obj:`collections.abc.Collection`, optional
# Get IDs for files with the given extensions (each starting with a dot). If this parameter is defined,
# ``native```and ``convertible`` are being ignored.
# native : :obj:`bool`, optional
# If ``score_extensions`` is not set, ``native=True`` selects all scores that ms3 can parse without using
# a MuseScore 3 executable.
# convertible : :obj:`bool`, optional
# If ``score_extensions`` is not set, ``convertible=True`` selects all scores that ms3 can parse as long as
# a MuseScore 3 executable is defined.
# opposite : :obj:`bool`, optional
# Pass True if you want to get the IDs of all the scores that do NOT have the specified extensions.
#
# Returns
# -------
# :obj:`list`
# A list of IDs.
#
# """
# if score_extensions is None:
# score_extensions = []
# if native:
# score_extensions.extend(Score.native_formats)
# if convertible:
# score_extensions.extend(Score.convertible_formats)
# if opposite:
# return [(k, i) for k, i in self._iterids(keys) if self.fexts[k][i][1:].lower() not in score_extensions]
# return [(k, i) for k, i in self._iterids(keys) if self.fexts[k][i][1:].lower() in score_extensions]
#
#
#
# def _store_scores(self, key, i, folder, suffix='', root_dir=None, overwrite=False, simulate=False):
# """ Creates a MuseScore 3 file from the Score object at the given ID (key, i).
#
# Parameters
# ----------
# key, i : (:obj:`str`, :obj:`int`)
# ID from which to construct the new path and filename.
# root_dir : :obj:`str`, optional
# Defaults to None, meaning that the original root directory is used that was added to the Parse object.
# Otherwise, pass a directory to rebuild the original substructure. If ``folder`` is an absolute path,
# ``root_dir`` is ignored.
# folder : :obj:`str`
# Where to store the file. Can be relative to ``root_dir`` or absolute, in which case ``root_dir`` is ignored.
# If ``folder`` is relative, the behaviour depends on whether it starts with a dot ``.`` or not: If it does,
# the folder is created at every end point of the relative tree structure under ``root_dir``. If it doesn't,
# it is created only once, relative to ``root_dir``, and the relative tree structure is build below.
# suffix : :obj:`str`, optional
# Suffix to append to the original file name.
# overwrite : :obj:`bool`, optional
# Pass True to overwrite existing files.
# simulate : :obj:`bool`, optional
# Pass True if no files are to be written.
#
# Returns
# -------
# :obj:`str`
# Path of the stored file.
#
# """
#
# id = (key, i)
# logger = self.id_logger(id)
# fname = self.fnames[key][i]
#
# if id not in self._parsed_mscx:
# logger.error(f"No Score object found. Call parse_scores() first.")
# return
# path = self._calculate_path(key=key, i=i, root_dir=root_dir, folder=folder)
# if path is None:
# return
#
# fname = fname + suffix + '.mscx'
# file_path = os.path.join(path, fname)
# if os.path.isfile(file_path):
# if simulate:
# if overwrite:
# logger.warning(f"Would have overwritten {file_path}.")
# return
# logger.warning(f"Would have skipped {file_path}.")
# return
# elif not overwrite:
# logger.warning(f"Skipped {file_path}.")
# return
# if simulate:
# logger.debug(f"Would have written score to {file_path}.")
# else:
# os.makedirs(path, exist_ok=True)
# self._parsed_mscx[id].store_scores(file_path)
# logger.debug(f"Score written to {file_path}.")
#
# return file_path
#
#
# def _store_tsv(self, df, key, i, folder, suffix='', root_dir=None, what='DataFrame', simulate=False):
# """ Stores a given DataFrame by constructing path and file name from a loaded file based on the arguments.
#
# Parameters
# ----------
# df : :obj:`pandas.DataFrame`
# DataFrame to store as a TSV.
# key, i : (:obj:`str`, :obj:`int`)
# ID from which to construct the new path and filename.
# folder, root_dir : :obj:`str`
# Parameters passed to :py:meth:`_calculate_path`.
# suffix : :obj:`str`, optional
# Suffix to append to the original file name.
# what : :obj:`str`, optional
# Descriptor, what the DataFrame contains for more informative log message.
# simulate : :obj:`bool`, optional
# Pass True if no files are to be written.
#
# Returns
# -------
# :obj:`str`
# Path of the stored file.
#
# """
# tsv_logger = self.id_logger((key, i))
#
# if df is None:
# tsv_logger.debug(f"No DataFrame for {what}.")
# return
# path = self._calculate_path(key=key, i=i, root_dir=root_dir, folder=folder)
# if path is None:
# return
#
# fname = self.fnames[key][i] + suffix + ".tsv"
# file_path = os.path.join(path, fname)
# if simulate:
# tsv_logger.debug(f"Would have written {what} to {file_path}.")
# else:
# tsv_logger.debug(f"Writing {what} to {file_path}.")
# write_tsv(df, file_path, logger=tsv_logger)
# return file_path
#
#
#
# def _treat_key_param(self, keys):
# if keys is None:
# keys = list(self.full_paths.keys())
# elif isinstance(keys, str):
# keys = [keys]
# return [k for k in sorted(set(keys)) if k in self.files]
#
#
# def _treat_harmony_layer_param(self, harmony_layer):
# if harmony_layer is None:
# return None
# all_types = {str(k): k for k in self.count_labels().keys()}
# if isinstance(harmony_layer, int) or isinstance(harmony_layer, str):
# harmony_layer = [harmony_layer]
# lt = [str(t) for t in harmony_layer]
# def matches_any_type(user_input):
# return any(True for t in all_types if user_input in t)
# def get_matches(user_input):
# return [t for t in all_types if user_input in t]
#
# not_found = [t for t in lt if not matches_any_type(t)]
# if len(not_found) > 0:
# plural = len(not_found) > 1
# plural_s = 's' if plural else ''
# self.logger.warning(
# f"No labels found with {'these' if plural else 'this'} label{plural_s} harmony_layer{plural_s}: {', '.join(not_found)}")
# return [all_types[t] for user_input in lt for t in get_matches(user_input)]
#
# def update_metadata(self, allow_suffix=False):
# """Uses all parsed metadata TSVs to update the information in the corresponding parsed MSCX files and returns
# the IDs of those that have been changed.
#
# Parameters
# ----------
# allow_suffix : :obj:`bool`, optional
# If set to True, this would also update the metadata for currently parsed MuseScore files
# corresponding to the columns 'rel_paths' and 'fnames' + [ANY SUFFIX]. For example,
# the row ('MS3', 'bwv846') would also update the metadata of 'MS3/bwv846_reviewed.mscx'.
#
# Returns
# -------
# :obj:`list`
# IDs of the parsed MuseScore files whose metadata has been updated.
# """
# metadata_dfs = self.metadata_tsv()
# if len(metadata_dfs) > 0:
# metadata = pd.concat(metadata_dfs.values(), keys=metadata_dfs.keys())
# else:
# metadata = self._metadata
# if len(metadata) == 0:
# self.logger.debug("No parsed metadata found.")
# return
# old = metadata
# if old.index.names != ['rel_paths', 'fnames']:
# try:
# old = old.set_index(['rel_paths', 'fnames'])
# except KeyError:
# self.logger.warning(f"Parsed metadata do not contain the columns 'rel_paths' and 'fnames' "
# f"needed to match information on identical files.")
# return []
# new = self.metadata(from_tsv=False).set_index(['rel_paths', 'fnames'])
# excluded_cols = ['ambitus', 'annotated_key', 'KeySig', 'label_count', 'last_mc', 'last_mn', 'musescore',
# 'TimeSig', 'length_qb', 'length_qb_unfolded', 'all_notes_qb', 'n_onsets', 'n_onset_positions']
# old_cols = sorted([c for c in old.columns if c not in excluded_cols and c[:5] != 'staff'])
#
# parsed = old.index.map(lambda i: i in new.index)
# relevant = old.loc[parsed, old_cols]
# updates = defaultdict(dict)
# for i, row in relevant.iterrows():
# new_row = new.loc[i]
# for j, val in row[row.notna()].iteritems():
# val = str(val)
# if j not in new_row or str(new_row[j]) != val:
# updates[i][j] = val
#
# l = len(updates)
# ids = []
# if l > 0:
# for (rel_path, fname), new_dict in updates.items():
# matches = self.fname2ids(fname=fname, rel_path=rel_path, allow_suffix=allow_suffix)
# match_ids = [id for id in matches.keys() if id in self._parsed_mscx]
# n_files_to_update = len(match_ids)
# if n_files_to_update == 0:
# self.logger.debug(
# f"rel_path={rel_path}, fname={fname} does not correspond to a currently parsed MuseScore file.")
# continue
# for id in match_ids:
# for name, val in new_dict.items():
# self._parsed_mscx[id].mscx.parsed.metatags[name] = val
# self._parsed_mscx[id].mscx.parsed.update_metadata()
# self.id_logger(id).debug(f"Updated with {new_dict}")
# ids.append(id)
#
# self.logger.info(f"{l} files updated.")
# else:
# self.logger.info("Nothing to update.")
# return ids
#
#
# def __getstate__(self):
# """ Override the method of superclass """
# return self.__dict__
def __getitem__(self, item) -> Corpus:
if isinstance(item, str):
return self.get_corpus(item)
elif isinstance(item, tuple):
if len(item) == 1:
return self.get_corpus(item[0])
if len(item) == 2:
corpus_name, fname_or_ix = item
return self.get_corpus(corpus_name)[fname_or_ix]
corpus_name, *remainder = item
return self.get_corpus(corpus_name)[tuple(remainder)]
[docs] def __iter__(self) -> Iterator[Tuple[str, Corpus]]:
""" Iterate through all (corpus_name, Corpus) tuples, regardless of any Views.
Yields: (corpus_name, Corpus) tuples
"""
yield from self.corpus_objects.items()
[docs] def __repr__(self):
"""Show the :meth:`info` under the active view."""
return self.info(return_str=True)
# def _get_unambiguous_fnames_from_ids(self, score_ids, key):
#
# file_info = [self.id2file_info[id] for id in score_ids]
# score_names = [F.fname for F in file_info]
# score_name_set = set(score_names)
# if len(score_names) == len(score_name_set):
# return dict(zip(score_names, score_ids))
# more_than_one = {name: [] for name, cnt in Counter(score_names).items() if cnt > 1}
# result = {} # fname -> score_id
# for F in file_info:
# if F.fname in more_than_one:
# more_than_one[F.fname].append(F)
# else:
# result[F.fname] = F.id
# for name, files in more_than_one.items():
# choice_between_n = len(files)
# df = pd.DataFrame.from_dict({F.id: dict(subdir=F.subdir, fext=F.fext, subdir_len=len(F.subdir)) for F in files}, orient='index')
# self.logger.debug(f"Trying to disambiguate between these {choice_between_n} with the same fname '{name}':\n{df}")
# shortest_subdir_length = df.subdir_len.min()
# shortest_length_selector = (df.subdir_len == shortest_subdir_length)
# n_have_shortest_length = shortest_length_selector.sum()
# # checking if the shortest path contains only 1 file and pick that
# if n_have_shortest_length == 1:
# id = df.subdir_len.idxmin()
# picked = df.loc[id]
# self.logger.info(f"In order to pick one from the {choice_between_n} scores with fname '{name}', the one with the shortest subdir '{picked.subdir}' was selected.")
# result[name] = id
# continue
# # otherwise, check if there is only a single MSCX or otherwise MSCZ file and pick that
# fexts = df.fext.value_counts()
# if '.mscx' in fexts:
# if fexts['.mscx'] == 1:
# picked = df[df.fext == '.mscx'].iloc[0]
# id = picked.name
# self.logger.info(f"In order to pick one from the {choice_between_n} scores with fname '{name}', the one contained in '{picked.subdir}' was selected because it is the only "
# f"one in MSCX format.")
# result[name] = id
# continue
# elif '.mscz' in fexts and fexts['.mscz'] == 1:
# picked = df[df.fext == '.mscz'].iloc[0]
# id = picked.name
# self.logger.info(
# f"In order to pick one from the {choice_between_n} scores with fname '{name}', the one contained in '{picked.subdir}' was selected because it is the only "
# f"one in MuseScore format.")
# result[name] = id
# continue
# # otherwise, check if the shortest path contains only a single MSCX or MSCZ file as a last resort
# if n_have_shortest_length < choice_between_n:
# df = df[shortest_length_selector]
# self.logger.debug(f"Picking those from the shortest subdir has reduced the choice to {n_have_shortest_length}:\n{df}.")
# else:
# self.logger.warning(f"Unable to pick one of the available scores for fname '{name}', it will be disregarded until disambiguated:\n{df}")
# continue
# if '.mscx' in df.fext.values and fexts['.mscx'] == 1:
# pick_ext = '.mscx'
# elif '.mscz' in df.fext.values and fexts['.mscz'] == 1:
# pick_ext = '.mscz'
# else:
# self.logger.warning(f"Unable to pick one of the available scores for fname '{name}', it will be disregarded until disambiguated:\n{df}")
# continue
# picked = df[df.fext == pick_ext].iloc[0]
# id = picked.name
# self.logger.info(
# f"In order to pick one from the {choice_between_n} scores with fname '{name}', the '{pick_ext}' one contained in '{picked.subdir}' was selected because it is the only "
# f"one in that format contained in the shortest subdir.")
# result[name] = id
# return result
########################################################################################################################
########################################################################################################################
################################################# End of Parse() ########################################################
########################################################################################################################
########################################################################################################################
########################################################################################################################
########################################################################################################################
################################################# End of View() ########################################################
########################################################################################################################
########################################################################################################################
#
# class PieceView(View):
#
# def __init__(self,
# view: View,
# fname: str):
# self.view = view # parent View object
# self.p = view.p
# self.key = view.key
# self.fname = fname
# logger_cfg = self.p.logger_cfg
# logger_cfg['name'] = f"{self.view.logger.name}.{self.fname}"
# super(Parse, self).__init__(subclass='Piece', logger_cfg=logger_cfg) # initialize loggers
# matches = view.detect_ids_by_fname(parsed_only=True, names=[fname])
# if len(matches) != 1:
# raise ValueError(f"{len(matches)} fnames match {fname} for key {self.key}")
# self.matches = matches[fname]
# self.score_available = 'scores' in self.matches
# self.measures_available = self.score_available or 'measures' in self.matches
#
#
#
# @lru_cache()
# def get_dataframe(self, what: Literal['measures', 'notes', 'rests', 'labels', 'expanded', 'events', 'chords', 'metadata', 'form_labels'],
# unfold: bool = False,
# quarterbeats: bool = False,
# interval_index: bool = False,
# disambiguation: str = 'auto',
# prefer_score: bool = True,
# return_file_info: bool = False) -> pd.DataFrame:
# """ Retrieves one DataFrame for the piece.
#
# Args:
# what: What kind of DataFrame to retrieve.
# unfold: Pass True to unfold repeats.
# quarterbeats:
# interval_index:
# disambiguation: In case several DataFrames are available in :attr:`.matches`, pass its disambiguation string.
# prefer_score: By default, data from parsed scores is preferred to that from parsed TSVs. Pass False to prefer TSVs.
# return_file_info: Pass True if the method should also return a :obj:`namedtuple` with information on the DataFrame
# being returned. It comes with the fields "id", "full_path", "suffix", "fext", "subdir", "i_str" where the
# latter is the ID's second component as a string.
#
# Returns:
# The requested DataFrame if available and, if ``return_file_info`` is set to True, a namedtuple with information about its provenance.
#
# Raises:
# FileNotFoundError: If no DataFrame of the requested type is available
# """
# available = list(self.p._dataframes.keys())
# if what not in available:
# raise ValueError(f"what='{what}' is an invalid argument. Pass one of {available}.")
# if self.score_available and (prefer_score or what not in self.matches):
# file_info = disambiguate(self.matches['scores'], disambiguation=disambiguation)
# score = self.p[file_info.id]
# df = score.mscx.__getattribute__(what)()
# elif what in self.matches:
# file_info = disambiguate(self.matches[what], disambiguation=disambiguation)
# df = self.p[file_info.id]
# else:
# raise FileNotFoundError(f"No {what} available for {self.key} -> {self.fname}")
# if any((unfold, quarterbeats, interval_index)):
# measures = self.get_dataframe('measures', prefer_score=prefer_score)
# df = dfs2quarterbeats([df], measures, unfold=unfold, quarterbeats=quarterbeats,
# interval_index=interval_index, logger=self.logger)[0]
# if return_file_info:
# return df, file_info
# return df