Skip to content

Assays

Generate snail assays.

AssayParams

Bases: BaseModel

Parameters for assay generation.

  • baseline: Baseline reading value (must be positive)
  • degrade: Rate at which sample responses decrease per day after first day (0-1)
  • delay: Maximum number of days between specimen collection and assay (must be positive)
  • mutant: Mutant reading value (must be positive)
  • noise: Noise level for readings (must be positive)
  • oops: Factor to multiply response values by for one random person (0 means no adjustment)
  • plate_size: Size of assay plate (must be positive)
  • seed: Random seed for reproducibility
Source code in src/snailz/assays.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class AssayParams(BaseModel):
    """Parameters for assay generation.

    - baseline: Baseline reading value (must be positive)
    - degrade: Rate at which sample responses decrease per day after first day (0-1)
    - delay: Maximum number of days between specimen collection and assay (must be positive)
    - mutant: Mutant reading value (must be positive)
    - noise: Noise level for readings (must be positive)
    - oops: Factor to multiply response values by for one random person (0 means no adjustment)
    - plate_size: Size of assay plate (must be positive)
    - seed: Random seed for reproducibility
    """

    baseline: float = Field(gt=0)
    degrade: float = Field(ge=0, le=1)
    delay: int = Field(gt=0)
    mutant: float = Field(gt=0)
    noise: float = Field(gt=0)
    oops: float = Field(ge=0)
    plate_size: int = Field(gt=0)
    seed: int = Field()

    model_config = {"extra": "forbid"}

Assay

Bases: BaseModel

A single assay.

  • performed: date assay was performed
  • ident: unique identifier
  • specimen_id: which specimen
  • person_id: who did the assay
  • readings: grid of assay readings
  • treatments: grid of samples or controls
Source code in src/snailz/assays.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class Assay(BaseModel):
    """A single assay.

    - performed: date assay was performed
    - ident: unique identifier
    - specimen_id: which specimen
    - person_id: who did the assay
    - readings: grid of assay readings
    - treatments: grid of samples or controls
    """

    performed: date
    ident: str
    specimen_id: str
    person_id: str
    readings: list[list[float]]
    treatments: list[list[str]]

    def to_csv(self, data_type: str) -> str:
        """Return a CSV string representation of the assay data.

        Parameters:
            data_type: Type of data to output, either "readings" or "treatments"

        Returns:
            A CSV-formatted string with the assay data in the format:
            id,<assay_id>
            specimen,<specimen_id>
            performed,<performed_date>
            performed_by,<person_id>
            ,A,B,C,...
            1,<data>,<data>,...
            2,<data>,<data>,...
            ...

            The CSV output uses Unix line endings (LF).

        Raises:
            ValueError: If data_type is not "readings" or "treatments"
        """
        if data_type not in ["readings", "treatments"]:
            raise ValueError("data_type must be 'readings' or 'treatments'")

        # Get the appropriate data based on data_type
        data = self.readings if data_type == "readings" else self.treatments

        # Generate column headers (A, B, C, etc.) and calculate metadata padding
        plate_size = len(data)
        column_headers = [""] + [chr(65 + i) for i in range(plate_size)]
        max_columns = len(column_headers)
        padding = [""] * (max_columns - 2)

        # Write metadata rows with Unix line endings
        output = io.StringIO()
        writer = utils.csv_writer(output)
        writer.writerow(["id", self.ident] + padding)
        writer.writerow(["specimen", self.specimen_id] + padding)
        writer.writerow(["performed", self.performed.isoformat()] + padding)
        writer.writerow(["performed_by", self.person_id] + padding)

        # Write data rows with row numbers
        writer.writerow(column_headers)
        for i, row in enumerate(data, 1):
            writer.writerow([i] + row)
        return output.getvalue()

to_csv(data_type)

Return a CSV string representation of the assay data.

Parameters:

Name Type Description Default
data_type str

Type of data to output, either "readings" or "treatments"

required

Returns:

Type Description
str

A CSV-formatted string with the assay data in the format:

str

id,

str

specimen,

str

performed,

str

performed_by,

str

,A,B,C,...

str

1,,,...

str

2,,,...

str

...

str

The CSV output uses Unix line endings (LF).

Raises:

Type Description
ValueError

If data_type is not "readings" or "treatments"

Source code in src/snailz/assays.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def to_csv(self, data_type: str) -> str:
    """Return a CSV string representation of the assay data.

    Parameters:
        data_type: Type of data to output, either "readings" or "treatments"

    Returns:
        A CSV-formatted string with the assay data in the format:
        id,<assay_id>
        specimen,<specimen_id>
        performed,<performed_date>
        performed_by,<person_id>
        ,A,B,C,...
        1,<data>,<data>,...
        2,<data>,<data>,...
        ...

        The CSV output uses Unix line endings (LF).

    Raises:
        ValueError: If data_type is not "readings" or "treatments"
    """
    if data_type not in ["readings", "treatments"]:
        raise ValueError("data_type must be 'readings' or 'treatments'")

    # Get the appropriate data based on data_type
    data = self.readings if data_type == "readings" else self.treatments

    # Generate column headers (A, B, C, etc.) and calculate metadata padding
    plate_size = len(data)
    column_headers = [""] + [chr(65 + i) for i in range(plate_size)]
    max_columns = len(column_headers)
    padding = [""] * (max_columns - 2)

    # Write metadata rows with Unix line endings
    output = io.StringIO()
    writer = utils.csv_writer(output)
    writer.writerow(["id", self.ident] + padding)
    writer.writerow(["specimen", self.specimen_id] + padding)
    writer.writerow(["performed", self.performed.isoformat()] + padding)
    writer.writerow(["performed_by", self.person_id] + padding)

    # Write data rows with row numbers
    writer.writerow(column_headers)
    for i, row in enumerate(data, 1):
        writer.writerow([i] + row)
    return output.getvalue()

AllAssays

Bases: BaseModel

Keep track of generated assays.

  • items: actual assays
  • params: parameters used in generation
Source code in src/snailz/assays.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
class AllAssays(BaseModel):
    """Keep track of generated assays.

    - items: actual assays
    - params: parameters used in generation
    """

    items: list[Assay]
    params: AssayParams

    def to_csv(self) -> str:
        """Return a CSV string representation of the assay summary data.

        Returns:
            A CSV-formatted string containing a summary of all assays with fields:
            - ident: assay identifier
            - specimen_id: specimen identifier
            - performed: date the assay was performed
            - performed_by: person identifier

            The CSV output uses Unix line endings (LF).
        """
        output = io.StringIO()
        writer = utils.csv_writer(output)
        writer.writerow(["ident", "specimen_id", "performed", "performed_by"])
        for assay in self.items:
            writer.writerow(
                [
                    assay.ident,
                    assay.specimen_id,
                    assay.performed.isoformat(),
                    assay.person_id,
                ]
            )
        return output.getvalue()

to_csv()

Return a CSV string representation of the assay summary data.

Returns:

Type Description
str

A CSV-formatted string containing a summary of all assays with fields:

str
  • ident: assay identifier
str
  • specimen_id: specimen identifier
str
  • performed: date the assay was performed
str
  • performed_by: person identifier
str

The CSV output uses Unix line endings (LF).

Source code in src/snailz/assays.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def to_csv(self) -> str:
    """Return a CSV string representation of the assay summary data.

    Returns:
        A CSV-formatted string containing a summary of all assays with fields:
        - ident: assay identifier
        - specimen_id: specimen identifier
        - performed: date the assay was performed
        - performed_by: person identifier

        The CSV output uses Unix line endings (LF).
    """
    output = io.StringIO()
    writer = utils.csv_writer(output)
    writer.writerow(["ident", "specimen_id", "performed", "performed_by"])
    for assay in self.items:
        writer.writerow(
            [
                assay.ident,
                assay.specimen_id,
                assay.performed.isoformat(),
                assay.person_id,
            ]
        )
    return output.getvalue()

assays_generate(params, people, specimens)

Generate an assay for each specimen.

Parameters:

Name Type Description Default
params AssayParams

AssayParams object containing assay generation parameters

required
people AllPersons

People object with staff members

required
specimens AllSpecimens

Specimens object with individual specimens to generate assays for

required

Returns:

Type Description
AllAssays

Assays object containing generated assays and parameters

Source code in src/snailz/assays.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
def assays_generate(
    params: AssayParams, people: AllPersons, specimens: AllSpecimens
) -> AllAssays:
    """Generate an assay for each specimen.

    Parameters:
        params: AssayParams object containing assay generation parameters
        people: People object with staff members
        specimens: Specimens object with individual specimens to generate assays for

    Returns:
        Assays object containing generated assays and parameters
    """
    individuals = specimens.individuals
    susc_locus = specimens.susceptible_locus
    susc_base = specimens.susceptible_base
    items = []

    gen = utils.UniqueIdGenerator("assays", lambda: f"{random.randint(0, 999999):06d}")

    # If oops factor is greater than 0, select one person randomly to have their values adjusted
    oops_person_id = None
    if params.oops > 0:
        oops_person = random.choice(people.individuals)
        oops_person_id = oops_person.ident

    for individual in individuals:
        # Set assay date to specimen collection date plus a random number of days (0 to delay)
        assay_date = individual.collected_on + timedelta(
            days=random.randint(0, params.delay)
        )
        assay_id = gen.next()

        # Generate treatments randomly with equal probability
        treatments = []
        for row in range(params.plate_size):
            treatment_row = []
            for col in range(params.plate_size):
                treatment_row.append(random.choice(["S", "C"]))
            treatments.append(treatment_row)

        # Calculate degradation factor based on days since collection
        days_since_collection = (assay_date - individual.collected_on).days
        degradation_days = max(
            0, days_since_collection - 1
        )  # No degradation on first day
        degradation_factor = max(0.0, 1.0 - (params.degrade * degradation_days))

        # Randomly select a person to perform the assay
        person = random.choice(people.individuals)
        person_id = person.ident

        # Generate readings based on treatments and susceptibility
        readings = []
        is_susceptible = individual.genome[susc_locus] == susc_base
        for row in range(params.plate_size):
            reading_row = []
            for col in range(params.plate_size):
                if treatments[row][col] == "C":
                    # Control cells have values uniformly distributed between 0 and noise
                    # Controls are not affected by degradation or oops factor
                    value = random.uniform(0, params.noise)
                else:
                    if is_susceptible:
                        # Susceptible specimens
                        noise = params.noise * params.mutant / params.baseline
                        base_value = params.mutant * degradation_factor
                    else:
                        # Non-susceptible specimens
                        noise = params.noise
                        base_value = params.baseline * degradation_factor

                    # Calculate value and adjust for oops
                    value = base_value + random.uniform(0, noise)
                    if params.oops > 0 and person_id == oops_person_id:
                        value = value * (1 + params.oops)

                reading_row.append(round(value, utils.PRECISION))

            readings.append(reading_row)

        # Create the assay record
        items.append(
            Assay(
                performed=assay_date,
                ident=assay_id,
                specimen_id=individual.ident,
                person_id=person_id,
                readings=readings,
                treatments=treatments,
            )
        )

    return AllAssays(items=items, params=params)

assays_to_csv(input, output)

Write assays to standard output or files.

Source code in src/snailz/assays.py
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
def assays_to_csv(input: str | Path, output: str | Path | None) -> None:
    """Write assays to standard output or files."""
    data = utils.load_data("assays", input, AllAssays)

    # Type casting for the type checker - this tells the type checker
    # that data is an AllAssays instance, but doesn't perform any runtime checks
    data = cast(AllAssays, data)

    # For stdout, only output the summary
    if output is None:
        content = data.to_csv()
        print(content, end="")
        return

    output_path = Path(output)
    with open(output_path / "assays.csv", "w") as writer:
        writer.write(data.to_csv())

    # Create assays subdirectory
    assays_dir = output_path / ASSAYS_SUBDIR
    assays_dir.mkdir(exist_ok=True)

    # Write individual assay files
    for assay in data.items:
        # Design file
        design_file = assays_dir / f"{assay.ident}_design.csv"
        with open(design_file, "w") as writer:
            writer.write(assay.to_csv(data_type="treatments"))

        # Readings file
        assay_file = assays_dir / f"{assay.ident}_assay.csv"
        with open(assay_file, "w") as writer:
            writer.write(assay.to_csv(data_type="readings"))