Skip to content

Mangle

Modify assay CSV files to simulate poor formatting.

mangle_assays(assays_dir, persons)

Create 'raw' assay files by mangling data of pristine files.

Parameters:

Name Type Description Default
assays_dir Path | str

Directory containing assay CSV files

required
persons AllPersons

People who performed experiments

required

Raises:

Type Description
ValueError

If people data cannot be loaded

Source code in src/snailz/mangle.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def mangle_assays(assays_dir: Path | str, persons: AllPersons) -> None:
    """Create 'raw' assay files by mangling data of pristine files.

    Parameters:
        assays_dir: Directory containing assay CSV files
        persons: People who performed experiments

    Raises:
        ValueError: If people data cannot be loaded
    """
    staff = {p.ident: p for p in persons.items}
    for filename in Path(assays_dir).glob(f"*{ORIGINAL}.csv"):
        with open(filename, "r") as stream:
            original = [row for row in csv.reader(stream)]
        mangled = _mangle_assay(original, staff)
        output_file = str(filename).replace(f"{ORIGINAL}.csv", f"{MANGLED}.csv")
        with open(output_file, "w") as stream:
            csv.writer(stream, lineterminator="\n").writerows(mangled)

_mangle_assay(data, staff)

Mangle a single assay file.

Source code in src/snailz/mangle.py
34
35
36
37
38
39
40
def _mangle_assay(data: list[list[str]], staff: dict[str, Person]) -> list[list]:
    """Mangle a single assay file."""
    manglers = [_mangle_id, _mangle_indent, _mangle_person]
    num_mangles = random.randint(0, len(manglers))
    for func in random.sample(manglers, num_mangles):
        data = func(data, staff)
    return data

_mangle_id(data, staff)

Convert ID field to string.

Source code in src/snailz/mangle.py
43
44
45
46
47
48
49
def _mangle_id(data: list[list[str]], staff: dict[str, Person]) -> list[list[str]]:
    """Convert ID field to string."""
    for row in data:
        if any(x == "id" for x in row):
            i = row.index("id")
            row[i + 1] = f"'{row[i + 1]}'"
    return data

_mangle_indent(data, staff)

Indent data portion.

Source code in src/snailz/mangle.py
52
53
54
def _mangle_indent(data: list[list], staff: dict[str, Person]) -> list[list[str]]:
    """Indent data portion."""
    return [([""] + row) if row[0].isdigit() else (row + [""]) for row in data]

_mangle_person(data, staff)

Replace person identifier with name.

Source code in src/snailz/mangle.py
57
58
59
60
61
62
63
64
def _mangle_person(data: list[list], staff: dict[str, Person]) -> list[list[str]]:
    """Replace person identifier with name."""
    for row in data:
        if row[0] == "by":
            row[0] = "performed"
            person = staff[row[1]]
            row[1] = f"{person.personal} {person.family}"
    return data