Skip to content

Mangle

Modify assay CSV files to simulate poor formatting.

mangle_assays(assays_dir, persons, forced=None)

Create 'raw' assay files by mangling data of pristine files.

Parameters:

Name Type Description Default
assays_dir Path | str

Directory containing assay CSV files

required
persons AllPersons

People who performed experiments

required
forced list[str] | None

names of changes to apply (select randomly if None)

None

Raises:

Type Description
ValueError

If people data cannot be loaded

Source code in src/snailz/mangle.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def mangle_assays(
    assays_dir: Path | str, persons: AllPersons, forced: list[str] | None = None
) -> None:
    """Create 'raw' assay files by mangling data of pristine files.

    Parameters:
        assays_dir: Directory containing assay CSV files
        persons: People who performed experiments
        forced: names of changes to apply (select randomly if None)

    Raises:
        ValueError: If people data cannot be loaded
    """
    staff = {p.ident: p for p in persons.items}
    for filename in Path(assays_dir).glob(f"*{ORIGINAL}.csv"):
        with open(filename, "r") as stream:
            original = [row for row in csv.reader(stream)]
        mangled = _mangle_assay(original, staff, forced)
        output_file = str(filename).replace(f"{ORIGINAL}.csv", f"{MANGLED}.csv")
        with open(output_file, "w") as stream:
            csv.writer(stream, lineterminator="\n").writerows(mangled)

_mangle_assay(data, staff, forced)

Mangle a single assay file.

Parameters:

Name Type Description Default
data list[list[str]]

values from CSV file

required
staff dict[str, Person]

people keyed by ID

required
forced list[str] | None

optional list of specified manglings (for testing)

required

Returns:

Type Description
list[list]

Modified copy of data.

Source code in src/snailz/mangle.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def _mangle_assay(
    data: list[list[str]], staff: dict[str, Person], forced: list[str] | None
) -> list[list]:
    """Mangle a single assay file.

    Parameters:
        data: values from CSV file
        staff: people keyed by ID
        forced: optional list of specified manglings (for testing)

    Returns:
        Modified copy of data.
    """
    available = {
        "id": _mangle_id,
        "indent": _mangle_indent,
        "person": _mangle_person,
    }

    if forced is None:
        num_mangles = random.randint(0, len(available))
        manglers = random.sample(list(available.values()), num_mangles)
    else:
        manglers = [available[name] for name in forced]

    for func in manglers:
        data = func(data, staff)
    return data

_mangle_id(data, staff)

Convert ID field to string.

Parameters:

Name Type Description Default
data list[list[str]]

values from CSV file

required
staff dict[str, Person]

people keyed by ID

required

Returns:

Type Description
list[list[str]]

Modified copy of data.

Source code in src/snailz/mangle.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def _mangle_id(data: list[list[str]], staff: dict[str, Person]) -> list[list[str]]:
    """Convert ID field to string.

    Parameters:
        data: values from CSV file
        staff: people keyed by ID

    Returns:
        Modified copy of data.
    """
    for row in data:
        if any(x == "id" for x in row):
            i = row.index("id")
            row[i + 1] = f"'{row[i + 1]}'"
    return data

_mangle_indent(data, staff)

Indent data portion.

Parameters:

Name Type Description Default
data list[list]

values from CSV file

required
staff dict[str, Person]

people keyed by ID

required

Returns:

Type Description
list[list[str]]

Modified copy of data.

Source code in src/snailz/mangle.py
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def _mangle_indent(data: list[list], staff: dict[str, Person]) -> list[list[str]]:
    """Indent data portion.

    Parameters:
        data: values from CSV file
        staff: people keyed by ID

    Returns:
        Modified copy of data.
    """
    return [
        ([""] + row)
        if (row[0].isdigit() or (row[0] == "" and row[1] == "A"))
        else (row + [""])
        for row in data
    ]

_mangle_person(data, staff)

Replace person identifier with name.

Parameters:

Name Type Description Default
data list[list]

values from CSV file

required
staff dict[str, Person]

people keyed by ID

required

Returns:

Type Description
list[list[str]]

Modified copy of data.

Source code in src/snailz/mangle.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def _mangle_person(data: list[list], staff: dict[str, Person]) -> list[list[str]]:
    """Replace person identifier with name.

    Parameters:
        data: values from CSV file
        staff: people keyed by ID

    Returns:
        Modified copy of data.
    """
    for row in data:
        if row[0] == "by":
            row[0] = "performed"
            person = staff[row[1]]
            row[1] = f"{person.personal} {person.family}"
    return data