Skip to content

Mangle

Modify assay CSV files to simulate poor formatting.

mangle_assay(readings_file, raw_file, persons, forced=None)

Mangle a single assay file.

Parameters:

Name Type Description Default
readings_file Path

clean readings file

required
raw_file Path

file to produce

required
persons list[Person]

staff members

required
forced bool

optional list of specified manglings (for testing)

None
Source code in src/snailz/mangle.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def mangle_assay(readings_file, raw_file, persons, forced=None):
    """Mangle a single assay file.

    Parameters:
        readings_file (Path): clean readings file
        raw_file (Path): file to produce
        persons (list[Person]): staff members
        forced (bool): optional list of specified manglings (for testing)
    """
    available = {
        "id": _mangle_id,
        "indent": _mangle_indent,
        "missing": _mangle_missing,
        "person": _mangle_person,
    }

    if forced is None:
        num_mangles = random.randint(0, len(available))
        manglers = random.sample(list(available.values()), num_mangles)
    else:
        manglers = [available[name] for name in forced]

    with open(readings_file, "r") as stream:
        data = [r for r in csv.reader(stream)]
    for func in manglers:
        data = func(data, persons)
    with open(raw_file, "w") as stream:
        csv.writer(stream).writerows(data)

_mangle_id(data, persons)

Convert ID field to string.

Source code in src/snailz/mangle.py
37
38
39
40
41
42
43
44
def _mangle_id(data, persons):
    """Convert ID field to string."""

    for row in data:
        if any(x == "id" for x in row):
            i = row.index("id")
            row[i + 1] = f"'{row[i + 1]}'"
    return data

_mangle_indent(data, persons)

Indent data portion.

Source code in src/snailz/mangle.py
47
48
49
50
51
52
53
54
55
def _mangle_indent(data, persons):
    """Indent data portion."""

    return [
        ([""] + row)
        if (row[0].isdigit() or (row[0] == "" and row[1] == "A"))
        else (row + [""])
        for row in data
    ]

_mangle_missing(data, persons)

Remove machine name (alters length of header).

Source code in src/snailz/mangle.py
58
59
60
61
def _mangle_missing(data, persons):
    """Remove machine name (alters length of header)."""

    return [row for row in data if row[0] != "machine"]

_mangle_person(data, persons)

Replace person identifier with name.

Source code in src/snailz/mangle.py
64
65
66
67
68
69
70
71
72
73
74
def _mangle_person(data, persons):
    """Replace person identifier with name."""

    for row in data:
        if row[0] == "person":
            row[0] = "by"
            person_id = row[1]
            matches = [p for p in persons if p.id == person_id]
            assert len(matches) == 1, f"Bad person ID {person_id} during mangling"
            row[1] = f"{matches[0].personal} {matches[0].family}"
    return data