1 from intermine import query
2 from interminebio.iterators import *
3
5 """
6 Class for querying InterMine Webservices for Sequence based data
7 ================================================================
8
9 This module allows you to construct queries that retrieve data about sequences and
10 sequence features in biologically relevant formats.
11
12 The currently supported formats are UCSC-BED, GFF3, and FASTA.
13
14 """
15
17 """
18 Constructor
19 ===========
20
21 @param service: The service to connect to.
22 @type service: intermine.webservice.Service
23
24 @param root: The root class of the query
25 @type root: str
26
27 """
28 self.service = service
29 self.query = query.Query(service.model, service, root=root)
30
31
32 self.add_constraint = self.query.add_constraint
33 self.filter = self.where
34
35 self.to_xml = self.query.to_xml
36
37 self.get_logic = self.query.get_logic
38 self.set_logic = self.query.set_logic
39
40 self.select_sequence = self.set_sequence
41 self.select_sequences = self.add_sequence_feature
42 self.add_sequence_features = self.add_sequence_feature
43
45 """
46 Add an arbitrarily long list of sequence features to the query.
47 ===============================================================
48
49 Fasta, GFF3 and BED queries all can read information from SequenceFeatures.
50 For Fasta you are advised to use the set_sequence method instead,
51 as unlike the GFF3 and BED services, the Fasta service can only handle
52 queries with one output column.
53 """
54 for f in features:
55 p = self.query.column(f)._path
56 if p.is_attribute() or not p.get_class().isa("SequenceFeature"):
57 raise ValueError("%s is not a Sequence Feature" % (f))
58 self.query.add_view(str(p) + ".id")
59
60 return self
61
62 - def where(self, *args, **kwargs):
63 self.query.where(*args, **kwargs)
64 return self
65
67 """
68 Set the sequence column to retrieve.
69 ====================================
70
71 Add a sequence holding object to the query. It can be a SequenceFeature, Protein
72 or Sequence object.
73
74 Fasta queries, which read sequences rather than sequence features,
75 currently only permit one output column.
76 """
77 self.query.views = []
78 p = self.query.column(f)._path
79 if p.is_attribute() or not (p.get_class().isa("SequenceFeature") or
80 p.get_class().isa("Protein") or
81 p.get_class().isa("Sequence")):
82 raise ValueError("%s has no sequence information" % (f))
83 self.query.add_view(str(p) + ".id")
84
85 return self
86
87 - def bed(self, ucsc_compatible=True):
88 """
89 Get results as BED
90 ==================
91
92 Return a BedIterator object, which stringifies to the BED results,
93 and works as an iterator over the lines. After iteration the header
94 information is accessible with the iter.header() method
95 """
96 return BedIterator(self.service, self.query, ucsc_compatible)
97
99 """
100 Get results as FASTA
101 ====================
102
103 Return a FastaIterator object, which stringifies to the Fasta results,
104 and works as an iterator over the records (not the lines).
105 """
106 return FastaIterator(self.service, self.query)
107
109 """
110 Get results as GFF3
111 ===================
112
113 Return a GFF3Iterator object, which stringifies to the GFF3 results,
114 and works as an iterator over the lines. After iteration the header
115 information is accessible with the iter.header() method
116 """
117 return GFF3Iterator(self.service, self.query)
118