Skip to content

Mangle

Modify assay CSV files to simulate poor formatting.

mangle_assay(readings_file, raw_file, persons, forced=None)

Mangle a single assay file.

Parameters:

Name Type Description Default
readings_file

clean readings file

required
raw_file

file to produce

required
persons

staff members

required
forced

optional list of specified manglings (for testing)

None
Source code in src/snailz/mangle.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def mangle_assay(readings_file, raw_file, persons, forced=None):
    """Mangle a single assay file.

    Parameters:
        readings_file: clean readings file
        raw_file: file to produce
        persons: staff members
        forced: optional list of specified manglings (for testing)
    """
    available = {
        "id": _mangle_id,
        "indent": _mangle_indent,
        "missing": _mangle_missing,
        "person": _mangle_person,
    }

    if forced is None:
        num_mangles = random.randint(0, len(available))
        manglers = random.sample(list(available.values()), num_mangles)
    else:
        manglers = [available[name] for name in forced]

    with open(readings_file, "r") as stream:
        data = [r for r in csv.reader(stream)]
    for func in manglers:
        data = func(data, persons)
    with open(raw_file, "w") as stream:
        csv.writer(stream).writerows(data)

_mangle_id(data, persons)

Convert ID field to string.

Parameters:

Name Type Description Default
data

values from CSV file

required
persons

lab staff

required

Returns:

Type Description

Modified copy of data.

Source code in src/snailz/mangle.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def _mangle_id(data, persons):
    """Convert ID field to string.

    Parameters:
        data: values from CSV file
        persons: lab staff

    Returns:
        Modified copy of data.
    """
    for row in data:
        if any(x == "id" for x in row):
            i = row.index("id")
            row[i + 1] = f"'{row[i + 1]}'"
    return data

_mangle_indent(data, persons)

Indent data portion.

Parameters:

Name Type Description Default
data

values from CSV file

required
persons

lab staff

required

Returns:

Type Description

Modified copy of data.

Source code in src/snailz/mangle.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def _mangle_indent(data, persons):
    """Indent data portion.

    Parameters:
        data: values from CSV file
        persons: lab staff

    Returns:
        Modified copy of data.
    """
    return [
        ([""] + row)
        if (row[0].isdigit() or (row[0] == "" and row[1] == "A"))
        else (row + [""])
        for row in data
    ]

_mangle_missing(data, persons)

Remove machine name (alters length of header).

Parameters:

Name Type Description Default
data

values from CSV file

required
persons

lab staff

required

Returns:

Type Description

Modified copy of data.

Source code in src/snailz/mangle.py
72
73
74
75
76
77
78
79
80
81
82
def _mangle_missing(data, persons):
    """Remove machine name (alters length of header).

    Parameters:
        data: values from CSV file
        persons: lab staff

    Returns:
        Modified copy of data.
    """
    return [row for row in data if row[0] != "machine"]

_mangle_person(data, persons)

Replace person identifier with name.

Parameters:

Name Type Description Default
data

values from CSV file

required
persons

lab staff

required

Returns:

Type Description

Modified copy of data.

Source code in src/snailz/mangle.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def _mangle_person(data, persons):
    """Replace person identifier with name.

    Parameters:
        data: values from CSV file
        persons: lab staff

    Returns:
        Modified copy of data.
    """
    for row in data:
        if row[0] == "by":
            row[0] = "performed"
            person_id = staff[row[1]]
            matches = [p for p in persons if p.id == person_id]
            assert len(matches) == 1, f"Bad person ID {person_id} during mangling"
            row[1] = f"{matches[0].personal} {matches[0].family}"
    return data