Module scan
Module for scanning and extracting data from aplog-generated files.
Expand source code
""" Module for scanning and extracting data from aplog-generated files.
"""
import sys, time, argparse, os
from timeit import default_timer as timer
#from pprint import pprint
import bisect
import numpy as np
from io import BytesIO
import msgpack
__version__ = 'v2.0.3 2021-08-11'#
#````````````````````````````Globals``````````````````````````````````````````
Nano = 0.000000001
TimeFormat_in = '%y%m%d_%H%M%S'
TimeFormat_out = '%y%m%d_%H%M%S'
#````````````````````````````Helper functions`````````````````````````````````
def _printv(msg):
if APScan.Verbosity >= 1:
print(f'DBG_APSV: {msg}')
def _printvv(msg):
if APScan.Verbosity >= 2 :
print(f'DBG_APSVV: {msg}')
def _croppedText(txt, limit=200):
if len(txt) > limit:
txt = txt[:limit]+'...'
return txt
def _seconds2Datetime(ns:int):
from datetime import datetime
dt = datetime.fromtimestamp(ns*Nano)
return dt.strftime('%y%m%d_%H%M%S')
def _timeInterval(startTime, span):
"""returns sections (string) and times (float) of time interval
boundaries"""
ttuple = time.strptime(startTime,TimeFormat_in)
firstDataSection = time.strftime(TimeFormat_out, ttuple)
startTime = time.mktime(ttuple)
endTime = startTime +span
endTime = min(endTime, 4102462799.)# 2099-12-31
ttuple = time.localtime(endTime)
endSection = time.strftime(TimeFormat_out, ttuple)
return firstDataSection, int(startTime/Nano), endSection, int(endTime/Nano)
def _unpacknp(data):
if not isinstance(data,(tuple,list)):
return data
if len(data) != 2:# expect two arrays: times and values
return data
#print( _croppedText(f'unp: {data}'))
unpacked = []
for i,item in enumerate(data):
try:
dtype = item['dtype']
shape = item['shape']
buf = item['bytes']
arr = np.frombuffer(buf, dtype=dtype).reshape(shape)
if i == 0:
arr = arr * Nano#
unpacked.append(arr)
except Exception as e:
print(f'Exception in iter: {e}')
if i == 0:
print(f'ERR in unpacknp: {e}')
return data
else:
print('not np-packed data')
unpacked.append(data)
#print( _croppedText(f'unpacked: {len(unpacked[0])} of {unpacked[0].dtype}, {len(unpacked[1])} of {unpacked[1].dtype}'))
return unpacked
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
#````````````````````````````class APView`````````````````````````````````````
class APScan():
Verbosity = 0
"""Show dedugging messages."""
def __init__(self, fileName):
"""Open logbook fileName, unpack headers, position file to data sections."""
self.logbookName = fileName
try:
self.logbookSize = os.path.getsize(fileName)
except Exception as e:
print(f'ERROR opening file {fileName}: {e}')
sys.exit()
self.logbook = open(fileName,'rb')
# unpack logbook contents and set file position after it
self.unpacker = msgpack.Unpacker(self.logbook, use_list=False
,strict_map_key=False) #use_list speeds up 20%, # does not help:, read_size=100*1024*1024)
self.dirSize = 0
self.directory = []
for contents in self.unpacker:
_printvv(_croppedText(f'Table of contents: {contents}'))
try:
self.dirSize = contents['contents']['size']
except:
print('Warning: Table of contents is missing or wrong')
break
self.directory = contents['data']
break
# unpack two sections after the contents: Abstract and Index
self.position = self.dirSize
self.logbook.seek(self.position)
self.unpacker = msgpack.Unpacker(self.logbook, use_list=False
,strict_map_key=False) #use_lis=False speeds up 20%
nSections = 0
for section in self.unpacker:
#print(f'section:{nSections}')
nSections += 1
if nSections == 1:# section: Abstract
_printvv(f'Abstract@{self.logbook.tell()}: {section}')
self.abstract = section['abstract']
self.compression = self.abstract.get('compression')
if self.compression is None:
continue
if self.compression != 'None':
module = __import__(self.compression)
self.decompress = module.decompress
continue
if nSections == 2:# section: Index
#_printvv(f'Index@{self.logbook.tell()}: {section}')
par2key = section['index']
#self.key2par = {value:key for key,value in self.par2key.items()}
self.key2par = par2key
_printvv(f'Index@{self.logbook.tell()}: {self.key2par}')
break
def get_headers(self):
"""Returns dict of header sections: Directory, Abstract, Index"""
return {'Directory':self.directory, 'Abstract':self.abstract
, 'Index':self.key2par}
def extract_objects(self, span=0., items=[], startTime=None
, bufSize=128*1024*1024):
"""
Returns correlated dict of times and values of the logged items during
the selected time interval.
**span**: Time interval for data extraction in seconds. If 0, then
the data will be extracted starting from the startTime and
ending at the end of the logbook.
**items**: List of integer indexes of items to extract. The map of
indexes to Control System parameters could be obtained using
get_headers()['Index'].
**startTime**: String for selecting start of the extraction interval.
Format: YYMMDD_HHMMSS. If None then extraction starts from the
beginning.
**bufSize**: Size of the bytesIO buffer. If file size is smaller than
the bufSize, then the whole file will be read into the buffer.
Otherwise each section will be read from the file sequentially.
Note, the Python3 read() for binary files is using very
effective buffering scheme, therefore using very large bufSize
have almost no effect on performance."""
extracted = {}
parameterStatistics = {}
endPosition = self.logbookSize
readerBufferSize = bufSize
# create empty map for return
if len(items) == 0: # enable handling of all items
#items = self.key2par.keys()
items = [i for i in range(len(self.key2par))]
#for key,par in self.key2par.items():
for key,par in enumerate(self.key2par):
if key not in parameterStatistics:
#print(f'add to stat[{len(parameterStatistics)+1}]: {key}')
parameterStatistics[key] = 0
if par not in extracted and key in items:
_printvv(f'add extracted[{len(extracted)+1}]: {par}')
extracted[key] = {'par':par, 'times':[], 'values':[]}
if len(self.directory) == 0:
print('ERROR. Directory is missing')
sys.exit()
# determine a part of the logbook for extraction
keys = list(self.directory.keys())
if startTime is None:
firstTStamp = keys[0]
startTime = _seconds2Datetime(firstTStamp)
firstDataSection, startTStamp, endSection, endTStamp\
= _timeInterval(startTime, span)
_printv(f'start,end:{firstDataSection, int(startTStamp*Nano), endSection, int(endTStamp*Nano)}')
# position logbook to first data section
lk = len(keys)
bt = timer()
# find nearest_key ising bisect, that is fast, ~10us
startSection_idx = bisect.bisect_left(keys, startTStamp)
#print(f'nidx: {startSection_idx,startTStamp,endTStamp}')
startSectionTStamp = keys[startSection_idx]
if startSectionTStamp > startTStamp:
startSection_idx -= 1
startSectionTStamp = keys[max(startSection_idx,0)]
endTStamp = startTStamp + span/Nano
nearest_idx = min(bisect.bisect_left(keys, endTStamp),lk-1)
lastSectionTStamp = keys[nearest_idx]
if lastSectionTStamp < endTStamp:
lastSectionTStamp = keys[min(nearest_idx+1,lk-1)]
self.position = self.directory[startSectionTStamp]
endPosition = self.directory[lastSectionTStamp]
_printvv(f'first dsection {self.position}')
_printvv(f'last dsection {endPosition}')
self.logbook.seek(self.position)
_printvv(f'logbook@{self.logbook.tell()}, offset={self.dirSize}')
# Try to read required sections into a buffer. If successful, then
# the streamReader for unpacker will be this buffer, otherwise
# it will be the logbook file.
toRead = endPosition - self.logbook.tell()
if toRead < readerBufferSize:
ts = timer()
rbuf = self.logbook.read(toRead)
ts1 = timer()
dt1 = round(ts1 - ts,6)
streamReader = BytesIO(rbuf)
dt2 = round(timer() - ts1,6)
print(f'Read {round(toRead/1e6,3)}MB in {dt1}s, adopted in {dt2}s')
_printv(f'Read {round(toRead/1e6,3)}MB in {dt1}s, adopted in {dt2}s')
else:
print((f'Read size {round(toRead/1e6,1)}MB >'
f' {round(readerBufferSize/1e6,1)}MB'
', processing it sequentially'))
streamReader = self.logbook
# re-create the Unpacker to re-position it in the logbook
self.unpacker = msgpack.Unpacker(streamReader, use_list=False
,strict_map_key=False) #use_list=False speeds up 20%
# loop over sections in the logbook
nSections = 0
if APScan.Verbosity >= 1:
sectionTime = [0.]*3
startTStampNS = startTStamp
endTStampNS = endTStamp
print(f'sts,ets:{startTStampNS,endTStampNS}')
extractionTime = 0.
perfMonTime = 0.
timerTotal = timer()
for section in self.unpacker:
extractionTStart = timer()
nSections += 1
# data sections
_printv(f'Data Section: {nSections+startSection_idx}')
if nSections%60 == 0:
dt = time.time() - extractionTime
_printv((f'Data sections: {nSections}'
f', elapsed time: {round(dt,4)}'))#, paragraphs/s: {nParagraphs//dt}'))
try:# handle compressed data
if self.compression != 'None':
ts = timer()
decompressed = self.decompress(section)
if APScan.Verbosity >= 1:
sectionTime[0] += timer() - ts
ts = timer()
section = msgpack.unpackb(decompressed
,strict_map_key=False)#ISSUE: strict_map_key does not work here
if APScan.Verbosity >= 1:
sectionTime[1] += timer() - ts
except Exception as e:
print(f'WARNING: wrong section {nSections}: {str(section)[:75]}...', {e})
break
_printv(f"Data section {nSections}: {section['tstart']}")
# iterate over parameters
ts = timer()
try:
# the following loop takes 90% time
for parIndex, tsValsNP in section['pars'].items():
if not parIndex in items:
continue
tstamps, values = _unpacknp(tsValsNP)
# trim array if needed
if tstamps[0] < startTStampNS:
first = bisect.bisect_left(tstamps, startTStampNS)
tstamps = tstamps[first:]
values = values[first:]
try:
if tstamps[-1] > endTStampNS:
last = bisect.bisect_left(tstamps, endTStampNS)
tstamps = tstamps[:last]
values = values[:last]
except: pass
if APScan.Verbosity >= 2:
print( _croppedText(f'times{parIndex}[{len(tstamps)}]: {tstamps}'))
try: vshape = f'of numpy arrays {values.dtype,values.shape}'
except: vshape = ''
print(f'vals{parIndex}[{len(values)}] {vshape}:')
print( _croppedText(f'{values}'))
#`````````Concatenation of parameter lists.``````````````
# Using numpy.concatenate turned to be very slow.
# The best performance is using list.extend()
extracted[parIndex]['times'].extend(list(tstamps))
ts2 = timer()
extracted[parIndex]['values'].extend(list(values))
perfMonTime += timer() - ts2
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
n = len(extracted[parIndex]['times'])
_printvv(f"par{parIndex}[{n}]")
parameterStatistics[parIndex] = n
except Exception as e:
print(f'WARNING: in concatenation: {e}')
dts = timer() - ts
if APScan.Verbosity >= 1:
sectionTime[2] += dts
extractionTime += timer() - extractionTStart
if APScan.Verbosity >= 1:
print(f'SectionTime: {[round(i/nSections,6) for i in sectionTime]}')
print(f'Deserialized from {self.logbookName}: {nSections} sections')
print(f'Sets/Parameter: {parameterStatistics}')
ttime = timer()-timerTotal
mbps = (f' {round(toRead/1e6/extractionTime,1)} MB/s'
f', including disk: {round(ttime,3)} s, {round(toRead/1e6/ttime,1)} MB/s')
print(f'Processing time: {round(extractionTime,3)} s, {mbps}')
print(f'Spent {round(perfMonTime/extractionTime*100,1)}% in the monitored code.')
return extracted
Classes
class APScan (fileName)
-
Open logbook fileName, unpack headers, position file to data sections.
Expand source code
class APScan(): Verbosity = 0 """Show dedugging messages.""" def __init__(self, fileName): """Open logbook fileName, unpack headers, position file to data sections.""" self.logbookName = fileName try: self.logbookSize = os.path.getsize(fileName) except Exception as e: print(f'ERROR opening file {fileName}: {e}') sys.exit() self.logbook = open(fileName,'rb') # unpack logbook contents and set file position after it self.unpacker = msgpack.Unpacker(self.logbook, use_list=False ,strict_map_key=False) #use_list speeds up 20%, # does not help:, read_size=100*1024*1024) self.dirSize = 0 self.directory = [] for contents in self.unpacker: _printvv(_croppedText(f'Table of contents: {contents}')) try: self.dirSize = contents['contents']['size'] except: print('Warning: Table of contents is missing or wrong') break self.directory = contents['data'] break # unpack two sections after the contents: Abstract and Index self.position = self.dirSize self.logbook.seek(self.position) self.unpacker = msgpack.Unpacker(self.logbook, use_list=False ,strict_map_key=False) #use_lis=False speeds up 20% nSections = 0 for section in self.unpacker: #print(f'section:{nSections}') nSections += 1 if nSections == 1:# section: Abstract _printvv(f'Abstract@{self.logbook.tell()}: {section}') self.abstract = section['abstract'] self.compression = self.abstract.get('compression') if self.compression is None: continue if self.compression != 'None': module = __import__(self.compression) self.decompress = module.decompress continue if nSections == 2:# section: Index #_printvv(f'Index@{self.logbook.tell()}: {section}') par2key = section['index'] #self.key2par = {value:key for key,value in self.par2key.items()} self.key2par = par2key _printvv(f'Index@{self.logbook.tell()}: {self.key2par}') break def get_headers(self): """Returns dict of header sections: Directory, Abstract, Index""" return {'Directory':self.directory, 'Abstract':self.abstract , 'Index':self.key2par} def extract_objects(self, span=0., items=[], startTime=None , bufSize=128*1024*1024): """ Returns correlated dict of times and values of the logged items during the selected time interval. **span**: Time interval for data extraction in seconds. If 0, then the data will be extracted starting from the startTime and ending at the end of the logbook. **items**: List of integer indexes of items to extract. The map of indexes to Control System parameters could be obtained using get_headers()['Index']. **startTime**: String for selecting start of the extraction interval. Format: YYMMDD_HHMMSS. If None then extraction starts from the beginning. **bufSize**: Size of the bytesIO buffer. If file size is smaller than the bufSize, then the whole file will be read into the buffer. Otherwise each section will be read from the file sequentially. Note, the Python3 read() for binary files is using very effective buffering scheme, therefore using very large bufSize have almost no effect on performance.""" extracted = {} parameterStatistics = {} endPosition = self.logbookSize readerBufferSize = bufSize # create empty map for return if len(items) == 0: # enable handling of all items #items = self.key2par.keys() items = [i for i in range(len(self.key2par))] #for key,par in self.key2par.items(): for key,par in enumerate(self.key2par): if key not in parameterStatistics: #print(f'add to stat[{len(parameterStatistics)+1}]: {key}') parameterStatistics[key] = 0 if par not in extracted and key in items: _printvv(f'add extracted[{len(extracted)+1}]: {par}') extracted[key] = {'par':par, 'times':[], 'values':[]} if len(self.directory) == 0: print('ERROR. Directory is missing') sys.exit() # determine a part of the logbook for extraction keys = list(self.directory.keys()) if startTime is None: firstTStamp = keys[0] startTime = _seconds2Datetime(firstTStamp) firstDataSection, startTStamp, endSection, endTStamp\ = _timeInterval(startTime, span) _printv(f'start,end:{firstDataSection, int(startTStamp*Nano), endSection, int(endTStamp*Nano)}') # position logbook to first data section lk = len(keys) bt = timer() # find nearest_key ising bisect, that is fast, ~10us startSection_idx = bisect.bisect_left(keys, startTStamp) #print(f'nidx: {startSection_idx,startTStamp,endTStamp}') startSectionTStamp = keys[startSection_idx] if startSectionTStamp > startTStamp: startSection_idx -= 1 startSectionTStamp = keys[max(startSection_idx,0)] endTStamp = startTStamp + span/Nano nearest_idx = min(bisect.bisect_left(keys, endTStamp),lk-1) lastSectionTStamp = keys[nearest_idx] if lastSectionTStamp < endTStamp: lastSectionTStamp = keys[min(nearest_idx+1,lk-1)] self.position = self.directory[startSectionTStamp] endPosition = self.directory[lastSectionTStamp] _printvv(f'first dsection {self.position}') _printvv(f'last dsection {endPosition}') self.logbook.seek(self.position) _printvv(f'logbook@{self.logbook.tell()}, offset={self.dirSize}') # Try to read required sections into a buffer. If successful, then # the streamReader for unpacker will be this buffer, otherwise # it will be the logbook file. toRead = endPosition - self.logbook.tell() if toRead < readerBufferSize: ts = timer() rbuf = self.logbook.read(toRead) ts1 = timer() dt1 = round(ts1 - ts,6) streamReader = BytesIO(rbuf) dt2 = round(timer() - ts1,6) print(f'Read {round(toRead/1e6,3)}MB in {dt1}s, adopted in {dt2}s') _printv(f'Read {round(toRead/1e6,3)}MB in {dt1}s, adopted in {dt2}s') else: print((f'Read size {round(toRead/1e6,1)}MB >' f' {round(readerBufferSize/1e6,1)}MB' ', processing it sequentially')) streamReader = self.logbook # re-create the Unpacker to re-position it in the logbook self.unpacker = msgpack.Unpacker(streamReader, use_list=False ,strict_map_key=False) #use_list=False speeds up 20% # loop over sections in the logbook nSections = 0 if APScan.Verbosity >= 1: sectionTime = [0.]*3 startTStampNS = startTStamp endTStampNS = endTStamp print(f'sts,ets:{startTStampNS,endTStampNS}') extractionTime = 0. perfMonTime = 0. timerTotal = timer() for section in self.unpacker: extractionTStart = timer() nSections += 1 # data sections _printv(f'Data Section: {nSections+startSection_idx}') if nSections%60 == 0: dt = time.time() - extractionTime _printv((f'Data sections: {nSections}' f', elapsed time: {round(dt,4)}'))#, paragraphs/s: {nParagraphs//dt}')) try:# handle compressed data if self.compression != 'None': ts = timer() decompressed = self.decompress(section) if APScan.Verbosity >= 1: sectionTime[0] += timer() - ts ts = timer() section = msgpack.unpackb(decompressed ,strict_map_key=False)#ISSUE: strict_map_key does not work here if APScan.Verbosity >= 1: sectionTime[1] += timer() - ts except Exception as e: print(f'WARNING: wrong section {nSections}: {str(section)[:75]}...', {e}) break _printv(f"Data section {nSections}: {section['tstart']}") # iterate over parameters ts = timer() try: # the following loop takes 90% time for parIndex, tsValsNP in section['pars'].items(): if not parIndex in items: continue tstamps, values = _unpacknp(tsValsNP) # trim array if needed if tstamps[0] < startTStampNS: first = bisect.bisect_left(tstamps, startTStampNS) tstamps = tstamps[first:] values = values[first:] try: if tstamps[-1] > endTStampNS: last = bisect.bisect_left(tstamps, endTStampNS) tstamps = tstamps[:last] values = values[:last] except: pass if APScan.Verbosity >= 2: print( _croppedText(f'times{parIndex}[{len(tstamps)}]: {tstamps}')) try: vshape = f'of numpy arrays {values.dtype,values.shape}' except: vshape = '' print(f'vals{parIndex}[{len(values)}] {vshape}:') print( _croppedText(f'{values}')) #`````````Concatenation of parameter lists.`````````````` # Using numpy.concatenate turned to be very slow. # The best performance is using list.extend() extracted[parIndex]['times'].extend(list(tstamps)) ts2 = timer() extracted[parIndex]['values'].extend(list(values)) perfMonTime += timer() - ts2 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, n = len(extracted[parIndex]['times']) _printvv(f"par{parIndex}[{n}]") parameterStatistics[parIndex] = n except Exception as e: print(f'WARNING: in concatenation: {e}') dts = timer() - ts if APScan.Verbosity >= 1: sectionTime[2] += dts extractionTime += timer() - extractionTStart if APScan.Verbosity >= 1: print(f'SectionTime: {[round(i/nSections,6) for i in sectionTime]}') print(f'Deserialized from {self.logbookName}: {nSections} sections') print(f'Sets/Parameter: {parameterStatistics}') ttime = timer()-timerTotal mbps = (f' {round(toRead/1e6/extractionTime,1)} MB/s' f', including disk: {round(ttime,3)} s, {round(toRead/1e6/ttime,1)} MB/s') print(f'Processing time: {round(extractionTime,3)} s, {mbps}') print(f'Spent {round(perfMonTime/extractionTime*100,1)}% in the monitored code.') return extracted
Class variables
var Verbosity
-
Show dedugging messages.
Methods
def extract_objects(self, span=0.0, items=[], startTime=None, bufSize=134217728)
-
Returns correlated dict of times and values of the logged items during the selected time interval.
span: Time interval for data extraction in seconds. If 0, then the data will be extracted starting from the startTime and ending at the end of the logbook.
items: List of integer indexes of items to extract. The map of indexes to Control System parameters could be obtained using get_headers()['Index'].
startTime: String for selecting start of the extraction interval. Format: YYMMDD_HHMMSS. If None then extraction starts from the beginning.
bufSize: Size of the bytesIO buffer. If file size is smaller than the bufSize, then the whole file will be read into the buffer. Otherwise each section will be read from the file sequentially. Note, the Python3 read() for binary files is using very effective buffering scheme, therefore using very large bufSize have almost no effect on performance.
Expand source code
def extract_objects(self, span=0., items=[], startTime=None , bufSize=128*1024*1024): """ Returns correlated dict of times and values of the logged items during the selected time interval. **span**: Time interval for data extraction in seconds. If 0, then the data will be extracted starting from the startTime and ending at the end of the logbook. **items**: List of integer indexes of items to extract. The map of indexes to Control System parameters could be obtained using get_headers()['Index']. **startTime**: String for selecting start of the extraction interval. Format: YYMMDD_HHMMSS. If None then extraction starts from the beginning. **bufSize**: Size of the bytesIO buffer. If file size is smaller than the bufSize, then the whole file will be read into the buffer. Otherwise each section will be read from the file sequentially. Note, the Python3 read() for binary files is using very effective buffering scheme, therefore using very large bufSize have almost no effect on performance.""" extracted = {} parameterStatistics = {} endPosition = self.logbookSize readerBufferSize = bufSize # create empty map for return if len(items) == 0: # enable handling of all items #items = self.key2par.keys() items = [i for i in range(len(self.key2par))] #for key,par in self.key2par.items(): for key,par in enumerate(self.key2par): if key not in parameterStatistics: #print(f'add to stat[{len(parameterStatistics)+1}]: {key}') parameterStatistics[key] = 0 if par not in extracted and key in items: _printvv(f'add extracted[{len(extracted)+1}]: {par}') extracted[key] = {'par':par, 'times':[], 'values':[]} if len(self.directory) == 0: print('ERROR. Directory is missing') sys.exit() # determine a part of the logbook for extraction keys = list(self.directory.keys()) if startTime is None: firstTStamp = keys[0] startTime = _seconds2Datetime(firstTStamp) firstDataSection, startTStamp, endSection, endTStamp\ = _timeInterval(startTime, span) _printv(f'start,end:{firstDataSection, int(startTStamp*Nano), endSection, int(endTStamp*Nano)}') # position logbook to first data section lk = len(keys) bt = timer() # find nearest_key ising bisect, that is fast, ~10us startSection_idx = bisect.bisect_left(keys, startTStamp) #print(f'nidx: {startSection_idx,startTStamp,endTStamp}') startSectionTStamp = keys[startSection_idx] if startSectionTStamp > startTStamp: startSection_idx -= 1 startSectionTStamp = keys[max(startSection_idx,0)] endTStamp = startTStamp + span/Nano nearest_idx = min(bisect.bisect_left(keys, endTStamp),lk-1) lastSectionTStamp = keys[nearest_idx] if lastSectionTStamp < endTStamp: lastSectionTStamp = keys[min(nearest_idx+1,lk-1)] self.position = self.directory[startSectionTStamp] endPosition = self.directory[lastSectionTStamp] _printvv(f'first dsection {self.position}') _printvv(f'last dsection {endPosition}') self.logbook.seek(self.position) _printvv(f'logbook@{self.logbook.tell()}, offset={self.dirSize}') # Try to read required sections into a buffer. If successful, then # the streamReader for unpacker will be this buffer, otherwise # it will be the logbook file. toRead = endPosition - self.logbook.tell() if toRead < readerBufferSize: ts = timer() rbuf = self.logbook.read(toRead) ts1 = timer() dt1 = round(ts1 - ts,6) streamReader = BytesIO(rbuf) dt2 = round(timer() - ts1,6) print(f'Read {round(toRead/1e6,3)}MB in {dt1}s, adopted in {dt2}s') _printv(f'Read {round(toRead/1e6,3)}MB in {dt1}s, adopted in {dt2}s') else: print((f'Read size {round(toRead/1e6,1)}MB >' f' {round(readerBufferSize/1e6,1)}MB' ', processing it sequentially')) streamReader = self.logbook # re-create the Unpacker to re-position it in the logbook self.unpacker = msgpack.Unpacker(streamReader, use_list=False ,strict_map_key=False) #use_list=False speeds up 20% # loop over sections in the logbook nSections = 0 if APScan.Verbosity >= 1: sectionTime = [0.]*3 startTStampNS = startTStamp endTStampNS = endTStamp print(f'sts,ets:{startTStampNS,endTStampNS}') extractionTime = 0. perfMonTime = 0. timerTotal = timer() for section in self.unpacker: extractionTStart = timer() nSections += 1 # data sections _printv(f'Data Section: {nSections+startSection_idx}') if nSections%60 == 0: dt = time.time() - extractionTime _printv((f'Data sections: {nSections}' f', elapsed time: {round(dt,4)}'))#, paragraphs/s: {nParagraphs//dt}')) try:# handle compressed data if self.compression != 'None': ts = timer() decompressed = self.decompress(section) if APScan.Verbosity >= 1: sectionTime[0] += timer() - ts ts = timer() section = msgpack.unpackb(decompressed ,strict_map_key=False)#ISSUE: strict_map_key does not work here if APScan.Verbosity >= 1: sectionTime[1] += timer() - ts except Exception as e: print(f'WARNING: wrong section {nSections}: {str(section)[:75]}...', {e}) break _printv(f"Data section {nSections}: {section['tstart']}") # iterate over parameters ts = timer() try: # the following loop takes 90% time for parIndex, tsValsNP in section['pars'].items(): if not parIndex in items: continue tstamps, values = _unpacknp(tsValsNP) # trim array if needed if tstamps[0] < startTStampNS: first = bisect.bisect_left(tstamps, startTStampNS) tstamps = tstamps[first:] values = values[first:] try: if tstamps[-1] > endTStampNS: last = bisect.bisect_left(tstamps, endTStampNS) tstamps = tstamps[:last] values = values[:last] except: pass if APScan.Verbosity >= 2: print( _croppedText(f'times{parIndex}[{len(tstamps)}]: {tstamps}')) try: vshape = f'of numpy arrays {values.dtype,values.shape}' except: vshape = '' print(f'vals{parIndex}[{len(values)}] {vshape}:') print( _croppedText(f'{values}')) #`````````Concatenation of parameter lists.`````````````` # Using numpy.concatenate turned to be very slow. # The best performance is using list.extend() extracted[parIndex]['times'].extend(list(tstamps)) ts2 = timer() extracted[parIndex]['values'].extend(list(values)) perfMonTime += timer() - ts2 #,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, n = len(extracted[parIndex]['times']) _printvv(f"par{parIndex}[{n}]") parameterStatistics[parIndex] = n except Exception as e: print(f'WARNING: in concatenation: {e}') dts = timer() - ts if APScan.Verbosity >= 1: sectionTime[2] += dts extractionTime += timer() - extractionTStart if APScan.Verbosity >= 1: print(f'SectionTime: {[round(i/nSections,6) for i in sectionTime]}') print(f'Deserialized from {self.logbookName}: {nSections} sections') print(f'Sets/Parameter: {parameterStatistics}') ttime = timer()-timerTotal mbps = (f' {round(toRead/1e6/extractionTime,1)} MB/s' f', including disk: {round(ttime,3)} s, {round(toRead/1e6/ttime,1)} MB/s') print(f'Processing time: {round(extractionTime,3)} s, {mbps}') print(f'Spent {round(perfMonTime/extractionTime*100,1)}% in the monitored code.') return extracted
def get_headers(self)
-
Returns dict of header sections: Directory, Abstract, Index
Expand source code
def get_headers(self): """Returns dict of header sections: Directory, Abstract, Index""" return {'Directory':self.directory, 'Abstract':self.abstract , 'Index':self.key2par}