Source code for spike.File.mzXML

#!/usr/bin/env python 
# encoding: utf-8

from __future__ import print_function, division
import numpy as np
import xml.dom.minidom
import base64
import struct
import unittest

[docs]class mzXML(): ''' Draft for reading mzXML files ''' def __init__(self, filename): self.doc = xml.dom.minidom.parse(filename) self.debug = 1 self.get_params()
[docs] def txt2np(self, txt): ''' from mzXML to numpy Float 32, big endian. Returns numpy format. ''' data = base64.b64decode(txt) endian = '>' precision = 'f' count = len(data) // struct.calcsize(endian + precision) nump = np.array(struct.unpack(endian + precision * count, data[0:len(data)])) return nump
[docs] def np2txt(self, nump): ''' from numpy to mzXML Float 32, big endian. Returns text format. ''' endian = '>' precision = 'f' count = len(nump) binary = struct.pack(endian + precision * count, *nump) txt = base64.b64encode(binary) return txt
[docs] def scattr(self, scan, param, kind=None): ''' Scan attributes kind specifies if we want integer, float etc.. ''' param_value = scan[0].getAttribute(param) if kind: return kind(param_value) else: return param_value
[docs] def get_params(self): ''' Extract parameters for limits and size ''' spec = self.doc.getElementsByTagName('peaks') scan = self.doc.getElementsByTagName('scan') num = self.txt2np(spec[0].childNodes[0].toxml()) # numpy data for spectrum with axis self.spec = num[0::2] self.mzaxis = num[1::2] self.scan_size = len(self.txt2np(spec[0].childNodes[0].toxml()))/2 # size of the scan self.dic_param = {'msLevel':int, 'peaksCount':int, 'polarity':None, 'scanType':None, 'filterLine':None, 'retentionTime':None, 'lowMz':float, 'highMz':float, 'basePeakMz':float, 'basePeakIntensity':float, 'totIonCurrent':int, 'collisionEnergy':int} for p in self.dic_param: if self.debug > 2: print("param ", p) try: setattr(self, p, self.scattr(scan, p, self.dic_param[p])) except: print("parameter {} not found".format(p))
[docs] def save_mzXML(self, x, y, namefile_final): ''' Save data in the new mzXML file. ''' spec = self.doc.getElementsByTagName('peaks') txt = self.np2txt(np.array(zip(x,y)).ravel()) spec[0].childNodes[0].replaceWholeText(txt) # Replacing in the mzXML self.doc.writexml(open(namefile_final,'w')) # Saving the mzXML print('mzXML saved')
[docs]class mzXML_Tests(unittest.TestCase):
[docs] def test_simple(self): from ..Tests import filename, directory mzxml = mzXML(directory()+'/testsmzXML/20131129_V_EV_bC_1499_CID30_r7k_25usc_range410-2000.mzXML') dic_param = {'msLevel':2, 'peaksCount':35775, 'polarity':"+", 'scanType':"Full", 'filterLine':"FTMS + p NSI Full ms2 1499.60@cid30.00 [410.00-2000.00]", 'retentionTime':"PT0.0366S", 'lowMz':410.001, 'highMz':2069.29, 'basePeakMz':1151.69, 'basePeakIntensity':13452.2, 'totIonCurrent':681064, 'collisionEnergy':30} for p in dic_param: getat = getattr(mzxml, p) self.assertEqual(getattr(mzxml,p), dic_param[p])
if __name__ == '__main__': unittest.main()