1 from intermine import query
2 from interminebio.iterators import *
3 try:
4 import simplejson as json
5 except ImportError:
6 try:
7 import json
8 except ImportError:
9 raise "No JSON module found - please install simplejson"
12 - def bed(self, ucsc_compatible=True):
13 """
14 Get results as BED
15 ==================
16
17 Return a BedIterator object, which stringifies to the BED results,
18 and works as an iterator over the lines. After iteration the header
19 information is accessible with the iter.header() method
20 """
21 return BedIterator(self.service, self.query, ucsc_compatible)
22
24 """
25 Get results as FASTA
26 ====================
27
28 Return a FastaIterator object, which stringifies to the Fasta results,
29 and works as an iterator over the records (not the lines).
30
31 When attempting to get results as FASTA the query may only have a single
32 output column. Errors will be raised otherwise.
33 """
34 return FastaIterator(self.service, self.query)
35
37 """
38 Get results as GFF3
39 ===================
40
41 Return a GFF3Iterator object, which stringifies to the GFF3 results,
42 and works as an iterator over the lines. After iteration the header
43 information is accessible with the iter.header() method
44 """
45 return GFF3Iterator(self.service, self.query)
46
49 """
50 Class for querying InterMine Webservices for Features in Genomic Intervals
51 ==========================================================================
52
53 This module allows you to construct queries that retrieve data about sequences and
54 sequence features in biologically relevant formats, where those features are located
55 overlapping genomic intervals.
56
57 The currently supported formats are UCSC-BED, GFF3, and FASTA.
58
59 These queries may also be used to construct lists with.
60
61 """
62
63
64 LIST_PATH = "/regions/list"
65 BED_PATH = "/regions/bed"
66 FASTA_PATH = "/regions/fasta"
67 GFF3_PATH = "/regions/gff3"
68
69 - def __init__(self, service, organism, feature_types, regions, extension=0, is_interbase=False):
70 """
71 Constructor
72 ===========
73
74 >>> s = Service("www.flymine.org/query", "API-KEY")
75 >>> org = "D. melanogaster"
76 >>> regions = ["2L:14614843..14619614"]
77 >>> feature_types = ["Exon", "Intron"]
78 >>> q = RegionQuery(s, org, feature_types, regions)
79 <interminebio.RegionQuery @xxx>
80
81 @param service: The service to connect to.
82 @type service: intermine.webservice.Service
83
84 @param organism: The short name of the organism to look within (eg: D. melanogaster)
85 @type organism: str
86
87 @param feature_types: The types of features to look for
88 @type feature_types: list[str]
89
90 @param regions: The regions to search within, in chrX:start..end or chrX\tstart\tend format
91 @type regions: list(str)
92
93 @param extension: A number of base-pairs to extend each region on either side (default: 0)
94 @type extension: int
95
96 @param is_interbase: Whether to interpret the co-ordinates as interbase co-ordinates
97 @type is_interbase: boolean
98
99 """
100 self.service = service
101 self.organism = organism
102 self.feature_types = set(feature_types)
103 self.regions = set(regions)
104 self.extension = extension
105 self.is_interbase = is_interbase
106 self.bed_path = RegionQuery.BED_PATH
107 self.fasta_path = RegionQuery.FASTA_PATH
108 self.gff3_path = RegionQuery.GFF3_PATH
109 self.views = []
110
112 return {
113 "organism": self.organism,
114 "featureTypes": list(self.feature_types),
115 "regions": list(self.regions),
116 "extension": self.extension,
117 "isInterbase": self.is_interbase
118 }
119
121 """
122 Returns the query parameters for this request.
123 ==============================================
124
125 This method is a required part of the interface for creating lists.
126
127 @rtype: dict
128 """
129 return {"query": json.dumps(self._get_region_query())}
130
132 """
133 Returns the full url for the list upload service
134 ================================================
135
136 This method is a required part of the interface for creating lists.
137
138 @rtype: str
139 """
140 return self.service.root + RegionQuery.LIST_PATH
141
142 @property
145
148 """
149 Class for querying InterMine Webservices for Sequence based data
150 ================================================================
151
152 This module allows you to construct queries that retrieve data about sequences and
153 sequence features in biologically relevant formats.
154
155 The currently supported formats are UCSC-BED, GFF3, and FASTA.
156
157 """
158
159 - def __init__(self, service_or_query, root=None):
160 """
161 Constructor
162 ===========
163
164 >>> s = Service("www.flymine.org/query")
165 >>> bio_query = SequenceQuery(s, "Gene")
166 <interminebio.SequenceQuery xxx>
167 >>> q = s.new_query("Gene").where(s.model.Gene.symbol == ["h", "r", "eve", "zen"])
168 >>> bio_query = SequenceQuery(q)
169 <interminebio.SequenceQuery yyy>
170
171 @param service_or_query: The service to connect to, or a query to wrap.
172 @type service_or_query: intermine.webservice.Service or intermine.query.Query
173
174 @param root: The root class of the query
175 @type root: str
176
177 """
178 if isinstance(service_or_query, query.Query):
179 self.service = service_or_query.service
180 self.query = service_or_query
181 else:
182 self.service = service_or_query
183 self.query = query.Query(self.service.model, self.service, root=root)
184
185
186 self.add_constraint = self.query.add_constraint
187 self.filter = self.where
188
189 self.to_xml = self.query.to_xml
190
191 self.get_logic = self.query.get_logic
192 self.set_logic = self.query.set_logic
193
194 self.select_sequence = self.set_sequence
195 self.select_sequences = self.add_sequence_feature
196 self.add_sequence_features = self.add_sequence_feature
197
199 """
200 Add an arbitrarily long list of sequence features to the query.
201 ===============================================================
202
203 Fasta, GFF3 and BED queries all can read information from SequenceFeatures.
204 For Fasta you are advised to use the set_sequence method instead,
205 as unlike the GFF3 and BED services, the Fasta service can only handle
206 queries with one output column.
207 """
208 for f in features:
209 p = self.query.column(f)._path
210 if p.is_attribute() or not p.get_class().isa("SequenceFeature"):
211 raise ValueError("%s is not a Sequence Feature" % (f))
212 self.query.add_view(str(p) + ".id")
213
214 return self
215
216 - def where(self, *args, **kwargs):
217 """
218 Add a constraint to the query, and return self for chaining.
219 """
220 self.query.where(*args, **kwargs)
221 return self
222
224 """
225 Set the sequence column to retrieve.
226 ====================================
227
228 Add a sequence holding object to the query. It can be a SequenceFeature, Protein
229 or Sequence object.
230
231 Fasta queries, which read sequences rather than sequence features,
232 currently only permit one output column.
233 """
234 self.query.views = []
235 p = self.query.column(f)._path
236 if p.is_attribute() or not (p.get_class().isa("SequenceFeature") or
237 p.get_class().isa("Protein") or
238 p.get_class().isa("Sequence")):
239 raise ValueError("%s has no sequence information" % (f))
240 self.query.add_view(str(p) + ".id")
241
242 return self
243