Package interminebio
[hide private]
[frames] | no frames]

Source Code for Package interminebio

  1  from intermine import query 
  2  from interminebio.iterators import * 
  3  try: 
  4      import simplejson as json 
  5  except ImportError: 
  6      try: 
  7         import json 
  8      except ImportError: 
  9          raise "No JSON module found - please install simplejson" 
10 11 -class SequenceDataQuery(object):
12 - def bed(self, ucsc_compatible=True):
13 """ 14 Get results as BED 15 ================== 16 17 Return a BedIterator object, which stringifies to the BED results, 18 and works as an iterator over the lines. After iteration the header 19 information is accessible with the iter.header() method 20 """ 21 return BedIterator(self.service, self.query, ucsc_compatible)
22
23 - def fasta(self):
24 """ 25 Get results as FASTA 26 ==================== 27 28 Return a FastaIterator object, which stringifies to the Fasta results, 29 and works as an iterator over the records (not the lines). 30 31 When attempting to get results as FASTA the query may only have a single 32 output column. Errors will be raised otherwise. 33 """ 34 return FastaIterator(self.service, self.query)
35
36 - def gff3(self):
37 """ 38 Get results as GFF3 39 =================== 40 41 Return a GFF3Iterator object, which stringifies to the GFF3 results, 42 and works as an iterator over the lines. After iteration the header 43 information is accessible with the iter.header() method 44 """ 45 return GFF3Iterator(self.service, self.query)
46
47 48 -class RegionQuery(SequenceDataQuery):
49 """ 50 Class for querying InterMine Webservices for Features in Genomic Intervals 51 ========================================================================== 52 53 This module allows you to construct queries that retrieve data about sequences and 54 sequence features in biologically relevant formats, where those features are located 55 overlapping genomic intervals. 56 57 The currently supported formats are UCSC-BED, GFF3, and FASTA. 58 59 These queries may also be used to construct lists with. 60 61 """ 62 63 64 LIST_PATH = "/regions/list" 65 BED_PATH = "/regions/bed" 66 FASTA_PATH = "/regions/fasta" 67 GFF3_PATH = "/regions/gff3" 68
69 - def __init__(self, service, organism, feature_types, regions, extension=0, is_interbase=False):
70 """ 71 Constructor 72 =========== 73 74 >>> s = Service("www.flymine.org/query", "API-KEY") 75 >>> org = "D. melanogaster" 76 >>> regions = ["2L:14614843..14619614"] 77 >>> feature_types = ["Exon", "Intron"] 78 >>> q = RegionQuery(s, org, feature_types, regions) 79 <interminebio.RegionQuery @xxx> 80 81 @param service: The service to connect to. 82 @type service: intermine.webservice.Service 83 84 @param organism: The short name of the organism to look within (eg: D. melanogaster) 85 @type organism: str 86 87 @param feature_types: The types of features to look for 88 @type feature_types: list[str] 89 90 @param regions: The regions to search within, in chrX:start..end or chrX\tstart\tend format 91 @type regions: list(str) 92 93 @param extension: A number of base-pairs to extend each region on either side (default: 0) 94 @type extension: int 95 96 @param is_interbase: Whether to interpret the co-ordinates as interbase co-ordinates 97 @type is_interbase: boolean 98 99 """ 100 self.service = service 101 self.organism = organism 102 self.feature_types = set(feature_types) 103 self.regions = set(regions) 104 self.extension = extension 105 self.is_interbase = is_interbase 106 self.bed_path = RegionQuery.BED_PATH 107 self.fasta_path = RegionQuery.FASTA_PATH 108 self.gff3_path = RegionQuery.GFF3_PATH 109 self.views = []
110
111 - def _get_region_query(self):
112 return { 113 "organism": self.organism, 114 "featureTypes": list(self.feature_types), 115 "regions": list(self.regions), 116 "extension": self.extension, 117 "isInterbase": self.is_interbase 118 }
119
120 - def to_query_params(self):
121 """ 122 Returns the query parameters for this request. 123 ============================================== 124 125 This method is a required part of the interface for creating lists. 126 127 @rtype: dict 128 """ 129 return {"query": json.dumps(self._get_region_query())}
130
131 - def get_list_upload_uri(self):
132 """ 133 Returns the full url for the list upload service 134 ================================================ 135 136 This method is a required part of the interface for creating lists. 137 138 @rtype: str 139 """ 140 return self.service.root + RegionQuery.LIST_PATH
141 142 @property
143 - def query(self):
144 return self
145
146 147 -class SequenceQuery(SequenceDataQuery):
148 """ 149 Class for querying InterMine Webservices for Sequence based data 150 ================================================================ 151 152 This module allows you to construct queries that retrieve data about sequences and 153 sequence features in biologically relevant formats. 154 155 The currently supported formats are UCSC-BED, GFF3, and FASTA. 156 157 """ 158
159 - def __init__(self, service_or_query, root=None):
160 """ 161 Constructor 162 =========== 163 164 >>> s = Service("www.flymine.org/query") 165 >>> bio_query = SequenceQuery(s, "Gene") 166 <interminebio.SequenceQuery xxx> 167 >>> q = s.new_query("Gene").where(s.model.Gene.symbol == ["h", "r", "eve", "zen"]) 168 >>> bio_query = SequenceQuery(q) 169 <interminebio.SequenceQuery yyy> 170 171 @param service_or_query: The service to connect to, or a query to wrap. 172 @type service_or_query: intermine.webservice.Service or intermine.query.Query 173 174 @param root: The root class of the query 175 @type root: str 176 177 """ 178 if isinstance(service_or_query, query.Query): 179 self.service = service_or_query.service 180 self.query = service_or_query 181 else: 182 self.service = service_or_query 183 self.query = query.Query(self.service.model, self.service, root=root) 184 185 # Set up delegations 186 self.add_constraint = self.query.add_constraint 187 self.filter = self.where 188 189 self.to_xml = self.query.to_xml 190 191 self.get_logic = self.query.get_logic 192 self.set_logic = self.query.set_logic 193 194 self.select_sequence = self.set_sequence 195 self.select_sequences = self.add_sequence_feature 196 self.add_sequence_features = self.add_sequence_feature
197
198 - def add_sequence_feature(self, *features):
199 """ 200 Add an arbitrarily long list of sequence features to the query. 201 =============================================================== 202 203 Fasta, GFF3 and BED queries all can read information from SequenceFeatures. 204 For Fasta you are advised to use the set_sequence method instead, 205 as unlike the GFF3 and BED services, the Fasta service can only handle 206 queries with one output column. 207 """ 208 for f in features: 209 p = self.query.column(f)._path 210 if p.is_attribute() or not p.get_class().isa("SequenceFeature"): 211 raise ValueError("%s is not a Sequence Feature" % (f)) 212 self.query.add_view(str(p) + ".id") 213 214 return self
215
216 - def where(self, *args, **kwargs):
217 """ 218 Add a constraint to the query, and return self for chaining. 219 """ 220 self.query.where(*args, **kwargs) 221 return self
222
223 - def set_sequence(self, f):
224 """ 225 Set the sequence column to retrieve. 226 ==================================== 227 228 Add a sequence holding object to the query. It can be a SequenceFeature, Protein 229 or Sequence object. 230 231 Fasta queries, which read sequences rather than sequence features, 232 currently only permit one output column. 233 """ 234 self.query.views = [] 235 p = self.query.column(f)._path 236 if p.is_attribute() or not (p.get_class().isa("SequenceFeature") or 237 p.get_class().isa("Protein") or 238 p.get_class().isa("Sequence")): 239 raise ValueError("%s has no sequence information" % (f)) 240 self.query.add_view(str(p) + ".id") 241 242 return self
243