Skip to content

Assays

Generate snail assays.

AssayParams

Bases: BaseModel

Parameters for assay generation.

  • baseline: Baseline reading value (must be positive)
  • end_date: End date for assay generation
  • mutant: Mutant reading value (must be positive)
  • noise: Noise level for readings (must be positive)
  • plate_size: Size of assay plate (must be positive)
  • seed: Random seed for reproducibility
  • start_date: Start date for assay generation (must not be after end_date)
Source code in src/snailz/assays.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class AssayParams(BaseModel):
    """Parameters for assay generation.

    - baseline: Baseline reading value (must be positive)
    - end_date: End date for assay generation
    - mutant: Mutant reading value (must be positive)
    - noise: Noise level for readings (must be positive)
    - plate_size: Size of assay plate (must be positive)
    - seed: Random seed for reproducibility
    - start_date: Start date for assay generation (must not be after end_date)
    """

    baseline: float = Field(gt=0)
    end_date: date = Field()
    mutant: float = Field(gt=0)
    noise: float = Field(gt=0)
    plate_size: int = Field(gt=0)
    seed: int = Field()
    start_date: date = Field()

    @model_validator(mode="after")
    def validate_date_range(self):
        """Validate that start_date is not after end_date."""
        if self.start_date > self.end_date:
            raise ValueError("start date must be less than or equal to end date")
        return self

    model_config = {"extra": "forbid"}

validate_date_range()

Validate that start_date is not after end_date.

Source code in src/snailz/assays.py
39
40
41
42
43
44
@model_validator(mode="after")
def validate_date_range(self):
    """Validate that start_date is not after end_date."""
    if self.start_date > self.end_date:
        raise ValueError("start date must be less than or equal to end date")
    return self

Assay

Bases: BaseModel

A single assay.

  • performed: date assay was performed
  • ident: unique identifier
  • specimen_id: which specimen
  • person_id: who did the assay
  • readings: grid of assay readings
  • treatments: grid of samples or controls
Source code in src/snailz/assays.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class Assay(BaseModel):
    """A single assay.

    - performed: date assay was performed
    - ident: unique identifier
    - specimen_id: which specimen
    - person_id: who did the assay
    - readings: grid of assay readings
    - treatments: grid of samples or controls
    """

    performed: date
    ident: str
    specimen_id: str
    person_id: str
    readings: list[list[float]]
    treatments: list[list[str]]

    def to_csv(self, data_type: str) -> str:
        """Return a CSV string representation of the assay data.

        Parameters:
            data_type: Type of data to output, either "readings" or "treatments"

        Returns:
            A CSV-formatted string with the assay data in the format:
            id,<assay_id>
            specimen,<specimen_id>
            performed,<performed_date>
            performed_by,<person_id>
            ,A,B,C,...
            1,<data>,<data>,...
            2,<data>,<data>,...
            ...

            The CSV output uses Unix line endings (LF).

        Raises:
            ValueError: If data_type is not "readings" or "treatments"
        """
        if data_type not in ["readings", "treatments"]:
            raise ValueError("data_type must be 'readings' or 'treatments'")

        # Get the appropriate data based on data_type
        data = self.readings if data_type == "readings" else self.treatments

        # Generate column headers (A, B, C, etc.) and calculate metadata padding
        plate_size = len(data)
        column_headers = [""] + [chr(65 + i) for i in range(plate_size)]
        max_columns = len(column_headers)
        padding = [""] * (max_columns - 2)

        # Write metadata rows with Unix line endings
        output = io.StringIO()
        writer = utils.csv_writer(output)
        writer.writerow(["id", self.ident] + padding)
        writer.writerow(["specimen", self.specimen_id] + padding)
        writer.writerow(["performed", self.performed.isoformat()] + padding)
        writer.writerow(["performed_by", self.person_id] + padding)

        # Write data rows with row numbers
        writer.writerow(column_headers)
        for i, row in enumerate(data, 1):
            writer.writerow([i] + row)
        return output.getvalue()

to_csv(data_type)

Return a CSV string representation of the assay data.

Parameters:

Name Type Description Default
data_type str

Type of data to output, either "readings" or "treatments"

required

Returns:

Type Description
str

A CSV-formatted string with the assay data in the format:

str

id,

str

specimen,

str

performed,

str

performed_by,

str

,A,B,C,...

str

1,,,...

str

2,,,...

str

...

str

The CSV output uses Unix line endings (LF).

Raises:

Type Description
ValueError

If data_type is not "readings" or "treatments"

Source code in src/snailz/assays.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def to_csv(self, data_type: str) -> str:
    """Return a CSV string representation of the assay data.

    Parameters:
        data_type: Type of data to output, either "readings" or "treatments"

    Returns:
        A CSV-formatted string with the assay data in the format:
        id,<assay_id>
        specimen,<specimen_id>
        performed,<performed_date>
        performed_by,<person_id>
        ,A,B,C,...
        1,<data>,<data>,...
        2,<data>,<data>,...
        ...

        The CSV output uses Unix line endings (LF).

    Raises:
        ValueError: If data_type is not "readings" or "treatments"
    """
    if data_type not in ["readings", "treatments"]:
        raise ValueError("data_type must be 'readings' or 'treatments'")

    # Get the appropriate data based on data_type
    data = self.readings if data_type == "readings" else self.treatments

    # Generate column headers (A, B, C, etc.) and calculate metadata padding
    plate_size = len(data)
    column_headers = [""] + [chr(65 + i) for i in range(plate_size)]
    max_columns = len(column_headers)
    padding = [""] * (max_columns - 2)

    # Write metadata rows with Unix line endings
    output = io.StringIO()
    writer = utils.csv_writer(output)
    writer.writerow(["id", self.ident] + padding)
    writer.writerow(["specimen", self.specimen_id] + padding)
    writer.writerow(["performed", self.performed.isoformat()] + padding)
    writer.writerow(["performed_by", self.person_id] + padding)

    # Write data rows with row numbers
    writer.writerow(column_headers)
    for i, row in enumerate(data, 1):
        writer.writerow([i] + row)
    return output.getvalue()

AllAssays

Bases: BaseModel

Keep track of generated assays.

  • items: actual assays
  • params: parameters used in generation
Source code in src/snailz/assays.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
class AllAssays(BaseModel):
    """Keep track of generated assays.

    - items: actual assays
    - params: parameters used in generation
    """

    items: list[Assay]
    params: AssayParams

    def to_csv(self) -> str:
        """Return a CSV string representation of the assay summary data.

        Returns:
            A CSV-formatted string containing a summary of all assays with fields:
            - ident: assay identifier
            - specimen_id: specimen identifier
            - performed: date the assay was performed
            - performed_by: person identifier

            The CSV output uses Unix line endings (LF).
        """
        output = io.StringIO()
        writer = utils.csv_writer(output)
        writer.writerow(["ident", "specimen_id", "performed", "performed_by"])
        for assay in self.items:
            writer.writerow(
                [
                    assay.ident,
                    assay.specimen_id,
                    assay.performed.isoformat(),
                    assay.person_id,
                ]
            )
        return output.getvalue()

to_csv()

Return a CSV string representation of the assay summary data.

Returns:

Type Description
str

A CSV-formatted string containing a summary of all assays with fields:

str
  • ident: assay identifier
str
  • specimen_id: specimen identifier
str
  • performed: date the assay was performed
str
  • performed_by: person identifier
str

The CSV output uses Unix line endings (LF).

Source code in src/snailz/assays.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def to_csv(self) -> str:
    """Return a CSV string representation of the assay summary data.

    Returns:
        A CSV-formatted string containing a summary of all assays with fields:
        - ident: assay identifier
        - specimen_id: specimen identifier
        - performed: date the assay was performed
        - performed_by: person identifier

        The CSV output uses Unix line endings (LF).
    """
    output = io.StringIO()
    writer = utils.csv_writer(output)
    writer.writerow(["ident", "specimen_id", "performed", "performed_by"])
    for assay in self.items:
        writer.writerow(
            [
                assay.ident,
                assay.specimen_id,
                assay.performed.isoformat(),
                assay.person_id,
            ]
        )
    return output.getvalue()

assays_generate(params, specimens, people)

Generate an assay for each specimen.

Parameters:

Name Type Description Default
params AssayParams

AssayParams object containing assay generation parameters

required
specimens AllSpecimens

Specimens object with individual specimens to generate assays for

required
people AllPersons

People object with staff members

required

Returns:

Type Description
AllAssays

Assays object containing generated assays and parameters

Source code in src/snailz/assays.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def assays_generate(
    params: AssayParams, specimens: AllSpecimens, people: AllPersons
) -> AllAssays:
    """Generate an assay for each specimen.

    Parameters:
        params: AssayParams object containing assay generation parameters
        specimens: Specimens object with individual specimens to generate assays for
        people: People object with staff members

    Returns:
        Assays object containing generated assays and parameters
    """
    days_delta = (params.end_date - params.start_date).days + 1
    individuals = specimens.individuals
    susc_locus = specimens.susceptible_locus
    susc_base = specimens.susceptible_base
    items = []

    gen = utils.UniqueIdGenerator("assays", lambda: f"{random.randint(0, 999999):06d}")

    for individual in individuals:
        assay_date = params.start_date + timedelta(
            days=random.randint(0, days_delta - 1)
        )
        assay_id = gen.next()

        # Generate treatments randomly with equal probability
        treatments = []
        for row in range(params.plate_size):
            treatment_row = []
            for col in range(params.plate_size):
                treatment_row.append(random.choice(["S", "C"]))
            treatments.append(treatment_row)

        # Generate readings based on treatments and susceptibility
        readings = []
        is_susceptible = individual.genome[susc_locus] == susc_base
        for row in range(params.plate_size):
            reading_row = []
            for col in range(params.plate_size):
                if treatments[row][col] == "C":
                    # Control cells have values uniformly distributed between 0 and noise
                    reading_row.append(random.uniform(0, params.noise))
                elif is_susceptible:
                    # Susceptible specimens (with susceptible base at susceptible locus)
                    # Base mutant value plus noise scaled by mutant/baseline ratio
                    scaled_noise = round(
                        params.noise * params.mutant / params.baseline, utils.PRECISION
                    )
                    reading_row.append(params.mutant + random.uniform(0, scaled_noise))
                else:
                    # Non-susceptible specimens
                    # Base baseline value plus uniform noise
                    reading_row.append(
                        params.baseline + random.uniform(0, params.noise)
                    )
            # Handle limited precision.
            reading_row = [round(r, utils.PRECISION) for r in reading_row]
            readings.append(reading_row)

        # Randomly select a person to perform the assay
        person = random.choice(people.individuals)

        # Create the assay record
        items.append(
            Assay(
                performed=assay_date,
                ident=assay_id,
                specimen_id=individual.ident,
                person_id=person.ident,
                readings=readings,
                treatments=treatments,
            )
        )

    return AllAssays(items=items, params=params)

assays_to_csv(input, output)

Write assays to standard output or files.

Source code in src/snailz/assays.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
def assays_to_csv(input: str, output: str | None) -> None:
    """Write assays to standard output or files."""
    data = utils.load_data("assays", input, AllAssays)

    # Type casting for the type checker - this tells the type checker
    # that data is an AllAssays instance, but doesn't perform any runtime checks
    data = cast(AllAssays, data)

    # For stdout, only output the summary
    if output is None:
        content = data.to_csv()
        print(content, end="")
        return

    output_path = Path(output)
    with open(output_path / "assays.csv", "w") as writer:
        writer.write(data.to_csv())

    # Create assays subdirectory
    assays_dir = output_path / ASSAYS_SUBDIR
    assays_dir.mkdir(exist_ok=True)

    # Write individual assay files
    for assay in data.items:
        # Design file
        design_file = assays_dir / f"{assay.ident}_design.csv"
        with open(design_file, "w") as writer:
            writer.write(assay.to_csv(data_type="treatments"))

        # Readings file
        assay_file = assays_dir / f"{assay.ident}_assay.csv"
        with open(assay_file, "w") as writer:
            writer.write(assay.to_csv(data_type="readings"))