Source code for dohlee.gene
import mygene
from collections import OrderedDict
HOMO_SAPIENS_SPECIES_ID = 9606
[docs]def get_first_item(items):
"""Return the first item of items. If 'items' is a single value, just return it.
:param list/value items: A list of items or a single value.
:returns: The first item of items.
"""
return mygene.alwayslist(items)[0]
[docs]def ensg2symbol(ensembl_ids):
"""Convert Ensembl gene ids into gene symbols.
:param list ensembl_ids: A list of Ensembl IDs to be converted.
:returns: A list of HGNC symbols, which is the result of best conversion of given Ensembl IDs.
"""
mg = mygene.MyGeneInfo()
query_results = mg.getgenes(ensembl_ids, fields='symbol', species=HOMO_SAPIENS_SPECIES_ID)
raw_result = [(query_result['query'], query_result['symbol']) for query_result in query_results if 'symbol' in query_result]
best_result = OrderedDict()
for query_id, symbol in raw_result:
if query_id not in best_result:
best_result[query_id] = symbol
result = list(best_result.values())
return result[0] if len(result) == 1 else result
[docs]def symbol2ensg(symbols=None):
"""Convert gene symbols into Ensembl gene ids.
:param list symbols: A list of HGNC symbols to be converted.
:returns: A list of Ensembl gene symbols(ENSG symbols).
"""
mg = mygene.MyGeneInfo()
query_results = mg.querymany(symbols, scopes='symbol', fields='ensembl.gene', species=9606)
raw_result = [(query_result['query'], get_first_item(query_result['ensembl'])['gene'])
for query_result in query_results
if 'ensembl' in query_result]
best_result = OrderedDict()
for query_symbol, ensembl_id in raw_result:
if query_symbol not in best_result:
best_result[query_symbol] = ensembl_id
result = list(best_result.values())
return result[0] if len(result) == 1 else result