Skip to content

Specimens

Generate specimens.

Specimen

Bases: MinimalSpecimen

A single specimen.

Parameters:

Name Type Description Default
survey_id str

survey identifier

required
species int

species this snail belongs to

required
collected date

date when specimen was collected

required
genome str

bases in genome

required
is_mutant bool

is this specimen a mutant?

False
Source code in src/snailz/specimens.py
15
16
17
18
19
20
21
22
class Specimen(utils.MinimalSpecimen):
    """A single specimen."""

    survey_id: str = Field(description="survey identifier")
    species: int = Field(description="species this snail belongs to")
    collected: date = Field(description="date when specimen was collected")
    genome: str = Field(description="bases in genome")
    is_mutant: bool = Field(default=False, description="is this specimen a mutant?")

AllSpecimens

Bases: BaseModel

A set of generated specimens.

Parameters:

Name Type Description Default
loci list[list[int]]

locations where mutations can occur

required
references list[str]

unmutated genomes of each species

required
susc_base str

mutant base that induces mass changes

required
susc_locus int

location of mass change mutation

required
items list[Specimen]

list of individual specimens

required
Source code in src/snailz/specimens.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class AllSpecimens(BaseModel):
    """A set of generated specimens."""

    loci: list[list[int]] = Field(description="locations where mutations can occur")
    references: list[str] = Field(description="unmutated genomes of each species")
    susc_base: str = Field(description="mutant base that induces mass changes")
    susc_locus: int = Field(ge=0, description="location of mass change mutation")
    items: list[Specimen] = Field(description="list of individual specimens")

    def to_csv(self, full: bool = False) -> str:
        """Return a CSV string representation of the specimen data.

        Parameters:
            full: include mutant and species information

        Returns:
            A CSV-formatted string.
        """
        if full:
            return utils.to_csv(
                self.items,
                [
                    "ident",
                    "survey",
                    "x",
                    "y",
                    "collected",
                    "genome",
                    "mass",
                    "mutant",
                    "species",
                ],
                lambda s: [
                    s.ident,
                    s.survey_id,
                    s.location.x if s.location.x >= 0 else None,
                    s.location.y if s.location.y >= 0 else None,
                    s.collected.isoformat(),
                    s.genome,
                    s.mass,
                    s.is_mutant,
                    s.species,
                ],
            )
        else:
            return utils.to_csv(
                self.items,
                ["ident", "survey", "x", "y", "collected", "genome", "mass"],
                lambda s: [
                    s.ident,
                    s.survey_id,
                    s.location.x if s.location.x >= 0 else None,
                    s.location.y if s.location.y >= 0 else None,
                    s.collected.isoformat(),
                    s.genome,
                    s.mass,
                ],
            )

    @staticmethod
    def generate(params: SpecimenParams, surveys: AllSurveys) -> "AllSpecimens":
        """Generate a set of specimens.

        Parameters:
            params: specimen generation parameters
            surveys: surveys to generate specimens for

        Returns:
            A set of surveys.
        """

        num_species = len(params.prob_species)
        references = [
            model.specimen_reference_genome(params) for _ in range(num_species)
        ]
        loci = [model.mutation_loci(params) for _ in range(num_species)]
        susc_locus = utils.choose_one(loci[0])
        susc_base = utils.choose_one(
            list(set(utils.BASES) - {references[0][susc_locus]})
        )
        gen = utils.unique_id("specimen", _specimen_id_generator)

        specimens = AllSpecimens(
            references=references,
            loci=loci,
            susc_base=susc_base,
            susc_locus=susc_locus,
            items=[],
        )

        max_pollution = surveys.max_pollution()
        for survey in surveys.items:
            temp = [
                _make_specimen(params, specimens, survey, next(gen))
                for _ in range(model.specimens_num_per_survey(params, survey))
            ]
            model.specimens_place(survey, temp)
            for s in temp:
                s.mass = round(
                    model.specimen_adjust_mass(survey, max_pollution, s),
                    utils.PRECISION,
                )
            specimens.items.extend(temp)

        return specimens

to_csv(full=False)

Return a CSV string representation of the specimen data.

Parameters:

Name Type Description Default
full bool

include mutant and species information

False

Returns:

Type Description
str

A CSV-formatted string.

Source code in src/snailz/specimens.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def to_csv(self, full: bool = False) -> str:
    """Return a CSV string representation of the specimen data.

    Parameters:
        full: include mutant and species information

    Returns:
        A CSV-formatted string.
    """
    if full:
        return utils.to_csv(
            self.items,
            [
                "ident",
                "survey",
                "x",
                "y",
                "collected",
                "genome",
                "mass",
                "mutant",
                "species",
            ],
            lambda s: [
                s.ident,
                s.survey_id,
                s.location.x if s.location.x >= 0 else None,
                s.location.y if s.location.y >= 0 else None,
                s.collected.isoformat(),
                s.genome,
                s.mass,
                s.is_mutant,
                s.species,
            ],
        )
    else:
        return utils.to_csv(
            self.items,
            ["ident", "survey", "x", "y", "collected", "genome", "mass"],
            lambda s: [
                s.ident,
                s.survey_id,
                s.location.x if s.location.x >= 0 else None,
                s.location.y if s.location.y >= 0 else None,
                s.collected.isoformat(),
                s.genome,
                s.mass,
            ],
        )

generate(params, surveys) staticmethod

Generate a set of specimens.

Parameters:

Name Type Description Default
params SpecimenParams

specimen generation parameters

required
surveys AllSurveys

surveys to generate specimens for

required

Returns:

Type Description
AllSpecimens

A set of surveys.

Source code in src/snailz/specimens.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
@staticmethod
def generate(params: SpecimenParams, surveys: AllSurveys) -> "AllSpecimens":
    """Generate a set of specimens.

    Parameters:
        params: specimen generation parameters
        surveys: surveys to generate specimens for

    Returns:
        A set of surveys.
    """

    num_species = len(params.prob_species)
    references = [
        model.specimen_reference_genome(params) for _ in range(num_species)
    ]
    loci = [model.mutation_loci(params) for _ in range(num_species)]
    susc_locus = utils.choose_one(loci[0])
    susc_base = utils.choose_one(
        list(set(utils.BASES) - {references[0][susc_locus]})
    )
    gen = utils.unique_id("specimen", _specimen_id_generator)

    specimens = AllSpecimens(
        references=references,
        loci=loci,
        susc_base=susc_base,
        susc_locus=susc_locus,
        items=[],
    )

    max_pollution = surveys.max_pollution()
    for survey in surveys.items:
        temp = [
            _make_specimen(params, specimens, survey, next(gen))
            for _ in range(model.specimens_num_per_survey(params, survey))
        ]
        model.specimens_place(survey, temp)
        for s in temp:
            s.mass = round(
                model.specimen_adjust_mass(survey, max_pollution, s),
                utils.PRECISION,
            )
        specimens.items.extend(temp)

    return specimens

_make_specimen(params, specimens, survey, ident)

Make a single specimen.

Parameters:

Name Type Description Default
params SpecimenParams

specimen parameters

required
survey Survey

survey this specimen is from

required
ident str

specimen identifier

required

Returns:

Type Description
Specimen

A randomly-generated specimen.

Source code in src/snailz/specimens.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def _make_specimen(
    params: SpecimenParams,
    specimens: AllSpecimens,
    survey: Survey,
    ident: str,
) -> Specimen:
    """Make a single specimen.

    Parameters:
        params: specimen parameters
        survey: survey this specimen is from
        ident: specimen identifier

    Returns:
        A randomly-generated specimen.
    """
    collected = model.specimen_collection_date(survey)
    species, genome = model.specimen_genome(params, specimens)
    is_mutant = (species == 0) and (genome[specimens.susc_locus] == specimens.susc_base)

    mass = model.specimen_initial_mass(params, species, collected, is_mutant)
    return Specimen(
        ident=ident,
        survey_id=survey.ident,
        species=species,
        collected=collected,
        genome=genome,
        is_mutant=is_mutant,
        location=Point(x=0, y=0),
        mass=mass,
    )

_specimen_id_generator()

Specimen ID generation function.

Returns:

Type Description
str

Candidate ID for a specimen.

Source code in src/snailz/specimens.py
165
166
167
168
169
170
171
def _specimen_id_generator() -> str:
    """Specimen ID generation function.

    Returns:
        Candidate ID for a specimen.
    """
    return "".join(random.choices(string.ascii_uppercase, k=6))