Module nais_processor

Expand source code
import numpy as np
from matplotlib import colors
import matplotlib.pyplot as plt
from matplotlib.ticker import LogLocator
from datetime import date, datetime, timedelta
import matplotlib.dates as dts
import pandas as pd
import os
import locale
import warnings
import yaml
import re
import sys
from dateutil.parser import parse
from tinydb import TinyDB, Query
from tinydb.operations import add
import time
import json
import aerosol_functions as af

__pdoc__ = {
    'tubeloss': False,
    'average_mob': False,
    'average_dp': False,
    'find_diagnostic_names': False,
    'process_data': False,
    'get_environmental_data': False,
    'choose_particle_polarity': False,
    'bring_to_sealevel': False,
    'correct_inlet_losses': False,
    'wagner_ion_mode_correction': False,
    'clean_elem_noise': False,
    'clean_corona_ions': False,
    'add_flags': False,
}


# The final geometric mean diameters of diameter and mobility bins
dp_ion = np.array([7.86360416e-10, 9.08232168e-10, 1.04902018e-09, 1.21167006e-09,
1.39958930e-09, 1.61672083e-09, 1.86762862e-09, 2.15759741e-09,
2.49274932e-09, 2.88018000e-09, 3.32811839e-09, 3.84611427e-09,
4.44525917e-09, 5.13844742e-09, 5.94068566e-09, 6.86946146e-09,
7.94518431e-09, 9.19171623e-09, 1.06370142e-08, 1.23139134e-08,
1.42610904e-08, 1.65242568e-08, 1.91576555e-08, 2.22259544e-08,
2.58066722e-08, 2.99933244e-08, 3.48995548e-08, 4.06646353e-08])*1e9

dp_par = np.array([7.498942093324539870e-01,8.659643233600640144e-01,
9.999999999999980016e-01,1.154781984689456031e+00,1.333521432163321974e+00,
1.539926526059490097e+00,1.778279410038920094e+00,2.053525026457140079e+00,
2.371373705661659947e+00,2.738419634264360081e+00,3.162277660168379967e+00,
3.651741272548380213e+00,4.216965034285819591e+00,4.869675251658620141e+00,
5.623413251903479626e+00,6.493816315762099833e+00,7.498942093324560076e+00,
8.659643233600640144e+00,1.000000000000000000e+01,1.154781984689457985e+01,
1.333521432163323972e+01,1.539926526059490008e+01,1.778279410038922137e+01,
2.053525026457139901e+01,2.371373705661660125e+01,2.738419634264360170e+01,
3.162277660168379967e+01,3.651741272548380124e+01,4.216965034285819769e+01])

mob_ion = np.array([3.162277660168379937e-04,2.371373705661659990e-04,
1.778279410038920258e-04,1.333521432163320159e-04,1.000000000000000048e-04,
7.498942093324559917e-05,5.623413251903490022e-05,4.216965034285820205e-05,
3.162277660168380208e-05,2.371373705661660125e-05,1.778279410038919852e-05,
1.333521432163319990e-05,1.000000000000000082e-05,7.498942093324561442e-06,
5.623413251903490361e-06,4.216965034285830030e-06,3.162277660168380038e-06,
2.371373705661659871e-06,1.778279410038920148e-06,1.333521432163330027e-06,
1.000000000000000167e-06,7.498942093324570124e-07,5.623413251903499890e-07,
4.216965034285829924e-07,3.162277660168379721e-07,2.371373705661660136e-07,
1.778279410038920042e-07,1.333521432163329868e-07])*1e4

mob_ion_geomeans=np.array([2.73841963e-04, 2.05352503e-04, 1.53992653e-04, 1.15478198e-04,
8.65964323e-05, 6.49381632e-05, 4.86967525e-05, 3.65174127e-05,
2.73841963e-05, 2.05352503e-05, 1.53992653e-05, 1.15478198e-05,
8.65964323e-06, 6.49381632e-06, 4.86967525e-06, 3.65174127e-06,
2.73841963e-06, 2.05352503e-06, 1.53992653e-06, 1.15478198e-06,
8.65964323e-07, 6.49381632e-07, 4.86967525e-07, 3.65174127e-07,
2.73841963e-07, 2.05352503e-07, 1.53992653e-07])*1e4

dp_par_geomeans=np.array([0.80584219,  0.93057204,  1.07460783,  1.24093776,  1.43301257,
1.6548171 ,  1.91095297,  2.20673407,  2.54829675,  2.94272718,
3.39820833,  3.92418976,  4.53158364,  5.23299115,  6.0429639 ,
6.97830585,  8.05842188,  9.30572041, 10.74607828, 12.40937761,
14.3301257 , 16.548171  , 19.10952975, 22.06734069, 25.48296748,
29.42727176, 33.98208329, 39.24189758])

dlogmob_ion=np.array([0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125,
0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125,
0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125,
0.125])

dlogdp_ion = np.array([0.06257524, 0.0625811 , 0.06259375, 0.06260838, 0.06262533,
0.06264495, 0.06266769, 0.06269404, 0.06272461, 0.06276008,
0.06280128, 0.06284916, 0.06290487, 0.06296974, 0.06304539,
0.0631337 , 0.06323696, 0.06335788, 0.06349974, 0.0636665 ,
0.06386292, 0.06409481, 0.06436924, 0.06469482, 0.06508209,
0.06554394, 0.06609614, 0.06639699])

dlogdp_par=np.array([0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625,
0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625,
0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625,
0.0625, 0.0625, 0.0625, 0.0625, 0.0625])

filename_formats = [
["%Y-%m-%d.ions.nds","%Y-%m-%d.particles.nds","%Y-%m-%d.log"],
["%Y%m%d-block-ions.spectra","%Y%m%d-block-particles.spectra","%Y%m%d-block.records"],
["%Y%m%d-block-ions.spectra","%Y%m%d-block-particles.spectra","%Y%m%d-block.diagnostics"]]

possible_sampleflow_names1 = [
"sampleflow",
"Flowaer"]

possible_sampleflow_names2 = [
"pos_sampleflow.mean",
"neg_sampleflow.mean",
"pos_sampleflow",
"neg_sampleflow"
]

possible_temperature_names = [
"temperature.mean",
"temperature",
"temp"]

possible_pressure_names = [
"baro.mean",
"baro"]

# electrometer size ranges for different inverters (for the purpose of cleaning out electrometer noise):
ions_pos_v14_lrnd={"0": [7.16444775804687e-10, 1.0700473216535486e-09], "1": [8.766005865635541e-10, 1.2912139078236106e-09], "2":
 [1.0233784015731513e-09, 1.494607599390042e-09], "3": [1.167004143869059e-09, 1.6953050539978397e-09], "4":
 [1.3171140158277396e-09, 1.9129953633709412e-09], "5": [1.5010400712091295e-09, 2.196448726880819e-09], "6":
 [1.7374102549917467e-09, 2.5397495919423145e-09], "7": [1.9987455846433743e-09, 2.909827835540643e-09], "8":
 [2.308391221999045e-09, 3.399628318414748e-09], "9": [2.743654460662328e-09, 4.064377206025429e-09], "10":
 [3.235105579106799e-09, 4.708980203333236e-09], "11": [3.657786198230896e-09, 5.320149444473465e-09], "12":
 [4.40743242351629e-09, 7.235758783698336e-09], "13": [6.341141170615947e-09, 1.0173608443214825e-08], "14":
 [8.61139257420043e-09, 1.2899482426374689e-08], "15": [1.0474248763637253e-08, 1.556121847426194e-08], "16":
 [1.2937549036927316e-08, 2.07105715562843e-08], "17": [1.778277482687919e-08, 2.8143993934982054e-08], "18":
 [2.3703396062208645e-08, 3.638374245002531e-08], "19": [2.9466514534877575e-08, 4.3834357529742775e-08], "20":
 [3.3648711881521194e-08, 4.601489588650497e-08]}

particles_pos_v14_lrnd_elm25_chv={"0": [7.084021223198885e-10, 1.1041550022447838e-09], "1": [8.824680153980799e-10, 1.3381985992962885e-09], "2":
 [1.027268571461551e-09, 1.5535807431315568e-09], "3": [1.1710814400196452e-09, 1.7778453394121642e-09], "4":
 [1.3364125967253046e-09, 2.0521125317491067e-09], "5": [1.5165756393379476e-09, 2.364104261278049e-09], "6":
 [1.7469782917199996e-09, 2.7691235104649e-09], "7": [2.0030670784526445e-09, 3.1804078983335615e-09], "8":
 [2.3555174234652947e-09, 3.889745378517969e-09], "9": [2.8998890919509277e-09, 4.85643554134041e-09], "10":
 [3.4535426802312536e-09, 5.802269084443331e-09], "11": [3.952828282143858e-09, 6.706155173416103e-09], "12":
 [4.680496931685137e-09, 8.345157694215357e-09], "13": [5.908440036792274e-09, 1.0914143399425853e-08], "14":
 [7.25270144756779e-09, 1.3424351306635164e-08], "15": [8.481012704476841e-09, 1.5854436158750896e-08], "16":
 [9.92828580185976e-09, 1.8875874031548135e-08], "17": [1.2480795668183397e-08, 2.5489807672756865e-08], "18":
 [1.618264753193867e-08, 3.382694961210274e-08], "19": [1.9259686684447952e-08, 4.1933601744775866e-08], "20":
 [2.22037089383357e-08, 4.865271942977691e-08], "21": [2.4978007194778478e-08, 5.377912883862792e-08], "22":
 [2.7446273522212072e-08, 5.731891713329477e-08], "23": [2.966486504673139e-08, 6.006253448103395e-08], "24": 
 [3.1538173585926114e-08, 6.213974119411533e-08]}

particles_neg_v14_lrnd={"0": [7.071203017729596e-10, 1.1005013232430319e-09], "1": [8.797742415579472e-10, 1.3341534055269684e-09], "2":
 [1.0249360413125393e-09, 1.5500231709924995e-09], "3": [1.1663524905165027e-09, 1.76746761608056e-09], "4":
 [1.3247911530621861e-09, 2.0313528986656205e-09], "5": [1.4993872332769961e-09, 2.3322201548214762e-09], "6":
 [1.7211922995795243e-09, 2.7237415257553574e-09], "7": [1.9709779821521803e-09, 3.127660894213944e-09], "8":
 [2.2719521313715783e-09, 3.683906828365737e-09], "9": [2.6910604870370505e-09, 4.432425549741674e-09], "10":
 [3.1273717566860052e-09, 5.176267101656894e-09], "11": [3.5214138488359515e-09, 5.88784346872649e-09], "12":
 [4.344519392306214e-09, 8.075601998567308e-09], "13": [6.055647568060633e-09, 1.15707771780943e-08], "14":
 [7.891354399664643e-09, 1.4822317894774355e-08], "15": [9.488534907698969e-09, 1.7987926934224552e-08], "16": 
 [1.1972289469619363e-08, 2.4352144139698158e-08], "17": [1.6205665582181165e-08, 3.529177218334523e-08], "18": 
 [2.1726991804436577e-08, 4.890078120743212e-08], "19": [2.747153199034406e-08, 5.80468336800726e-08], "20": 
 [3.181625187151363e-08, 6.282803902888346e-08]}

ions_pos_v141_lrnd_elm25_chv={"0": [7.174408260504354e-10, 1.073624396936229e-09], "1": [8.795427161035404e-10, 1.2949773919916534e-09], "2":
 [1.0258582665799353e-09, 1.4979919549108165e-09], "3": [1.1719327677994699e-09, 1.70372171508208e-09], "4":
 [1.32799146465657e-09, 1.932594430907855e-09], "5": [1.520240256748169e-09, 2.2261429400706174e-09], "6":
 [1.7628304747501982e-09, 2.581861661002808e-09], "7": [2.0323715276272585e-09, 2.9598100485518586e-09], "8":
 [2.3816068293602794e-09, 3.5773872919384414e-09], "9": [2.962208652767496e-09, 4.432897017241381e-09], "10":
 [3.5762132418655334e-09, 5.239831433099603e-09], "11": [4.133555700629352e-09, 6.039237962304223e-09], "12": 
 [4.882640365333869e-09, 7.457408037033863e-09], "13": [6.2783199783114545e-09, 9.575956296394927e-09], "14": 
 [7.904204442058769e-09, 1.1694806122760362e-08], "15": [9.352755529575812e-09, 1.3732975020325302e-08], "16": 
 [1.0981897607602298e-08, 1.6276182350230556e-08], "17": [1.3500089105006028e-08, 2.1603165023592403e-08], "18": 
 [1.8022095523973226e-08, 2.708852306358357e-08], "19": [2.156970936611481e-08, 3.1921441726778743e-08], "20": 
 [2.4519917336200788e-08, 3.609220688998671e-08], "21": [2.7168015322179712e-08, 3.993591879861599e-08], "22": 
 [2.9639336081649807e-08, 4.339405159440388e-08], "23": [3.19559928473482e-08, 4.538489884003738e-08], "24":
 [3.362603005585794e-08, 4.572616914940904e-08]}

ions_pos_v141_hrnd_elm25_chv={"0": [7.177816842294488e-10, 1.072221766346416e-09], "1": [8.784324649240066e-10, 1.2952331371673022e-09], "2":
 [1.0282272593935292e-09, 1.496448130128779e-09], "3": [1.1709728034437686e-09, 1.70109575761716e-09], "4":
 [1.3300366642732846e-09, 1.9341044305153346e-09], "5": [1.521422514332051e-09, 2.220832669677381e-09], "6":
 [1.7632530836302399e-09, 2.584244551737274e-09], "7": [2.0351677980258766e-09, 2.955213035032816e-09], "8": 
 [2.379947213073259e-09, 3.5807287457548474e-09], "9": [2.9614387929489157e-09, 4.428794241338281e-09], "10": 
 [3.5808241708120252e-09, 5.247041226910477e-09], "11": [4.136034551200579e-09, 6.026277543624166e-09], "12": 
 [4.890752691340854e-09, 7.457985468634934e-09], "13": [6.2730994769101586e-09, 9.589732077862633e-09], "14":
 [7.893847904275436e-09, 1.1687225512360978e-08], "15": [9.345340174460752e-09, 1.3762043152233714e-08], "16":
 [1.0978493586549243e-08, 1.6262760201045273e-08], "17": [1.3526934344462764e-08, 2.157926406305932e-08], "18":
 [1.8045823361694697e-08, 2.711029182088656e-08], "19": [2.1550025677740064e-08, 3.1861088201059746e-08], "20": 
 [2.453350988841541e-08, 3.611626721404304e-08], "21": [2.7196984694271958e-08, 3.995278834492865e-08], "22": 
 [2.9641573529840143e-08, 4.3318065851782826e-08], "23": [3.187419245775203e-08, 4.5478238497900765e-08], "24":
 [3.3521538858575206e-08, 4.6046488264314935e-08]}

particles_pos_v14_lrnd={"0": [7.071203017729596e-10, 1.1005013232430319e-09], "1": [8.797742415579472e-10, 1.3341534055269684e-09], "2":
 [1.0249360413125393e-09, 1.5500231709924995e-09], "3": [1.1663524905165027e-09, 1.76746761608056e-09], "4":
 [1.3247911530621861e-09, 2.0313528986656205e-09], "5": [1.4993872332769961e-09, 2.3322201548214762e-09], "6":
 [1.7211922995795243e-09, 2.7237415257553574e-09], "7": [1.9709779821521803e-09, 3.127660894213944e-09], "8": 
 [2.2719521313715783e-09, 3.683906828365737e-09], "9": [2.6910604870370505e-09, 4.432425549741674e-09], "10": 
 [3.1273717566860052e-09, 5.176267101656894e-09], "11": [3.5214138488359515e-09, 5.88784346872649e-09], "12": 
 [4.344519392306214e-09, 8.075601998567308e-09], "13": [6.055647568060633e-09, 1.15707771780943e-08], "14": 
 [7.891354399664643e-09, 1.4822317894774355e-08], "15": [9.488534907698969e-09, 1.7987926934224552e-08], "16": 
 [1.1972289469619363e-08, 2.4352144139698158e-08], "17": [1.6205665582181165e-08, 3.529177218334523e-08], "18": 
 [2.1726991804436577e-08, 4.890078120743212e-08], "19": [2.747153199034406e-08, 5.80468336800726e-08], "20": 
 [3.181625187151363e-08, 6.282803902888346e-08]}

ions_neg_v141_lrnd_elm25_chv={"0": [7.162722829189713e-10, 1.069396256829605e-09], "1": [8.777038868584165e-10, 1.2946446240931807e-09], "2":
 [1.02759999667648e-09, 1.5009614340173589e-09], "3": [1.1745671577354954e-09, 1.7069296262514892e-09], "4":
 [1.330366866209285e-09, 1.936923662090671e-09], "5": [1.523569941517978e-09, 2.230584153641438e-09], "6":
 [1.763456724402997e-09, 2.5797080071606806e-09], "7": [2.0282822638164162e-09, 2.9544966841050904e-09], "8":
 [2.3814411392836232e-09, 3.585069910708154e-09], "9": [2.975412233075357e-09, 4.4586459608966074e-09], "10":
 [3.5950536764791088e-09, 5.264604651129057e-09], "11": [4.1451727671809455e-09, 6.0533707850852946e-09], "12":
 [4.908070335058664e-09, 7.554663305133556e-09], "13": [6.370251631603248e-09, 9.708647141925531e-09], "14":
 [7.99768693004782e-09, 1.1807392092933995e-08], "15": [9.421664599359865e-09, 1.3820815597006762e-08], "16":
 [1.1077372179456622e-08, 1.6484128380953205e-08], "17": [1.3672218205379754e-08, 2.166074698335169e-08], "18":
 [1.8004147704745442e-08, 2.705025422013022e-08], "19": [2.1526595943708422e-08, 3.186462197910958e-08], "20":
 [2.4479448411058165e-08, 3.602967602639687e-08], "21": [2.7167075804775312e-08, 3.995596608326538e-08], "22":
 [2.9709429581149162e-08, 4.347660778516122e-08], "23": [3.2017621669510734e-08, 4.541693553734151e-08], "24":
 [3.3681779306509456e-08, 4.5708942542429593e-08]}

ions_neg_v141_hrnd_elm25_chv={"0": [7.166088691273807e-10, 1.0680529042034586e-09], "1": [8.766077238618173e-10, 1.2948822433267513e-09], "2":
 [1.0299408040688298e-09, 1.499348034087609e-09], "3": [1.1734785792847775e-09, 1.7044494278972901e-09], "4":
 [1.3325133850369467e-09, 1.9382490836139435e-09], "5": [1.5246381007528337e-09, 2.225245739179207e-09], "6":
 [1.7639085256336676e-09, 2.5820656631195802e-09], "7": [2.031125366865139e-09, 2.9500866762907164e-09], "8":
 [2.3797421811002037e-09, 3.588570659248709e-09], "9": [2.974497863791539e-09, 4.454598568481367e-09], "10":
 [3.600685696291343e-09, 5.270248740213011e-09], "11": [4.147441219601736e-09, 6.040242224249017e-09], "12":
 [4.91677135667135e-09, 7.553001655458096e-09], "13": [6.368408114819548e-09, 9.720375884730652e-09], "14":
 [7.983853657878074e-09, 1.1805425434050108e-08], "15": [9.415625209974426e-09, 1.385125858049306e-08], "16":
 [1.1073898998765917e-08, 1.646956079843046e-08], "17": [1.3699649532490082e-08, 2.163235038140565e-08], "18":
 [1.802716724985027e-08, 2.7073230406563897e-08], "19": [2.150965694231408e-08, 3.18014983817787e-08], "20":
 [2.4492014342932346e-08, 3.605523207826252e-08], "21": [2.7196391232036758e-08, 3.9971877339499414e-08], "22":
 [2.9708672750890287e-08, 4.340340668907393e-08], "23": [3.1934336215501455e-08, 4.5516606356023787e-08], "24":
 [3.357606684507602e-08, 4.604620001912706e-08]}

particles_neg_v14_hrnd_elm25_chv={"0": [7.064042348613744e-10, 1.098448642629657e-09], "1": [8.804166704055082e-10, 1.3382314628239056e-09], "2":
 [1.029929687471369e-09, 1.5587130355258417e-09], "3": [1.1715316139497021e-09, 1.7855361801274118e-09], "4":
 [1.3375770105761571e-09, 2.0569498417960264e-09], "5": [1.5213418055613532e-09, 2.3650454557473994e-09], "6":
 [1.7508747954828767e-09, 2.7596483400473384e-09], "7": [2.0016660559184965e-09, 3.1683106443504784e-09], "8":
 [2.3537662142103147e-09, 3.892364103276864e-09], "9": [2.908132768324034e-09, 4.87724949455506e-09], "10":
 [3.4647588825814346e-09, 5.819021297995083e-09], "11": [3.9538012917605546e-09, 6.718817658651353e-09], "12":
 [4.699417872175317e-09, 8.46741114160911e-09], "13": [5.980472218281243e-09, 1.1075320194303405e-08], "14":
 [7.32788314110871e-09, 1.356178222960962e-08], "15": [8.546658915188655e-09, 1.5975204027458133e-08], "16":
 [1.003513710889909e-08, 1.9121314649296197e-08], "17": [1.2584497621235887e-08, 2.5586370462254533e-08], "18":
 [1.6193366252358348e-08, 3.375225311508183e-08], "19": [1.9296366093983868e-08, 4.135787670654157e-08], "20":
 [2.2205359456932222e-08, 4.775609758877974e-08], "21": [2.4949980719630763e-08, 5.296183331803598e-08], "22":
 [2.74591102013903e-08, 5.6781631708117125e-08], "23": [2.9663083522354196e-08, 5.960548252960392e-08], "24":
 [3.1563036449021557e-08, 6.184741780170188e-08]}

particles_pos_v14_hrnd_elm25_chv={"0": [7.078936573300082e-10, 1.1028124202267342e-09], "1": [8.820689074711288e-10, 1.338578480376556e-09], "2":
 [1.0282838004812897e-09, 1.5555733231186909e-09], "3": [1.1690201376888472e-09, 1.78149953583638e-09], "4":
 [1.3348577366613476e-09, 2.052450596702526e-09], "5": [1.5181851404773485e-09, 2.3598931859583523e-09], "6":
 [1.7509905816449534e-09, 2.7622818843531693e-09], "7": [2.005225058134512e-09, 3.173616142257642e-09], "8":
 [2.3525975248633675e-09, 3.883241010415373e-09], "9": [2.894779176315381e-09, 4.847916178622021e-09], "10":
 [3.4477052157384657e-09, 5.792051680298515e-09], "11": [3.944104923721565e-09, 6.701786044525521e-09], "12":
 [4.666951603466218e-09, 8.360587273279385e-09], "13": [5.8997089753584225e-09, 1.0919012148501147e-08], "14":
 [7.252706673752877e-09, 1.342019953209076e-08], "15": [8.486965748259635e-09, 1.586474588608095e-08], "16":
 [9.936765651010928e-09, 1.8874421710969515e-08], "17": [1.246739709406105e-08, 2.5483091135053706e-08], "18":
 [1.6208024471095697e-08, 3.3797206216952925e-08], "19": [1.9338854709118882e-08, 4.146124436417098e-08], "20":
 [2.2243072357824654e-08, 4.783315990811425e-08], "21": [2.4952099445624026e-08, 5.296484079233324e-08], "22":
 [2.7389130112705036e-08, 5.667341170880334e-08], "23": [2.9602350590226694e-08, 5.953977095698898e-08], "24":
 [3.149305285177872e-08, 6.176077534814207e-08]}

ions_neg_v14_lrnd={"0": [7.16444775804687e-10, 1.0700473216535486e-09], "1": [8.766005865635541e-10, 1.2912139078236106e-09], "2":
 [1.0233784015731513e-09, 1.494607599390042e-09], "3": [1.167004143869059e-09, 1.6953050539978397e-09], "4":
 [1.3171140158277396e-09, 1.9129953633709412e-09], "5": [1.5010400712091295e-09, 2.196448726880819e-09], "6":
 [1.7374102549917467e-09, 2.5397495919423145e-09], "7": [1.9987455846433743e-09, 2.909827835540643e-09], "8":
 [2.308391221999045e-09, 3.399628318414748e-09], "9": [2.743654460662328e-09, 4.064377206025429e-09], "10":
 [3.235105579106799e-09, 4.708980203333236e-09], "11": [3.657786198230896e-09, 5.320149444473465e-09], "12":
 [4.40743242351629e-09, 7.235758783698336e-09], "13": [6.341141170615947e-09, 1.0173608443214825e-08], "14":
 [8.61139257420043e-09, 1.2899482426374689e-08], "15": [1.0474248763637253e-08, 1.556121847426194e-08], "16":
 [1.2937549036927316e-08, 2.07105715562843e-08], "17": [1.778277482687919e-08, 2.8143993934982054e-08], "18":
 [2.3703396062208645e-08, 3.638374245002531e-08], "19": [2.9466514534877575e-08, 4.3834357529742775e-08], "20":
 [3.3648711881521194e-08, 4.601489588650497e-08]}

particles_neg_v14_lrnd_elm25_chv={"0": [7.069362615274498e-10, 1.0997213171324974e-09], "1": [8.808566990541904e-10, 1.3378548681525188e-09], "2":
 [1.0289107593642929e-09, 1.5567373372470626e-09], "3": [1.1735882698862885e-09, 1.7819165288950059e-09], "4":
 [1.33907446265167e-09, 2.0566964315737684e-09], "5": [1.5196769831618589e-09, 2.3693061857773854e-09], "6":
 [1.746857574391727e-09, 2.766483303572841e-09], "7": [1.9994652400593872e-09, 3.175124915389092e-09], "8":
 [2.356723607259348e-09, 3.898924433703989e-09], "9": [2.9132704671892387e-09, 4.885896237782877e-09], "10":
 [3.4706415007361796e-09, 5.829183030947836e-09], "11": [3.962593522847839e-09, 6.722985797091222e-09], "12":
 [4.7129989830246345e-09, 8.451324582668515e-09], "13": [5.988747492408655e-09, 1.1071809141751731e-08], "14":
 [7.327476301230385e-09, 1.3565350436407152e-08], "15": [8.540412648877852e-09, 1.5964663591630477e-08], "16":
 [1.0026869681645872e-08, 1.9124820128830518e-08], "17": [1.2598231836692178e-08, 2.5591353853168848e-08], "18":
 [1.6168464020950793e-08, 3.3780310510756356e-08], "19": [1.9217207258451423e-08, 4.1823547718041306e-08], "20":
 [2.2165154711387387e-08, 4.85721513696297e-08], "21": [2.4975870919608738e-08, 5.3775910543120685e-08], "22":
 [2.7516844715923574e-08, 5.7423919806126056e-08], "23": [2.9725447211466474e-08, 6.012495231939605e-08], "24":
 [3.160714147308623e-08, 6.222035590925216e-08]}

# Define standard conditions
temp_ref = 273.15 # K, 0C
pres_ref = 101325.0 # Pa, 1atm

def make_config_template(fn):
    """
    Make a configuration file template

    Parameters
    ----------

    fn : str
        full path to configuration file

        For example `/home/user/config.yml`

    Notes
    -----

    The default values are used to calculate the corrections
    in case the data is not available in the diagnostic data 
    either due missing or broken sensor.

    """
    with open(fn,"w") as f:
        f.write("measurement_location: # Name of the measurement site\n")
        f.write("data_folder: # Full paths to raw data folders\n")
        f.write("- # Data folder 1\n")
        f.write("- # Data folder 2, and so on...\n")
        f.write("processed_folder: # Full path to folder where procesed data is saved\n")
        f.write("database_file: # Full path to database file (will be created on first run) \n")
        f.write("start_date: # Format: yyyy-mm-dd\n")
        f.write("end_date: # Format: yyyy-mm-dd or '' for current day\n")
        f.write("inlet_length: # length of inlet in meters\n")
        f.write("do_inlet_loss_correction: # true or false\n")
        f.write("convert_to_standard_conditions: # true or false\n")
        f.write("do_wagner_ion_mode_correction: # true or false\n")
        f.write("remove_corona_ions: # true or false\n")
        f.write("remove_noisy_electrometers: # true or false\n")
        f.write("inverter_name: # hires_25, lores_25, lores_21 or '' (needed for noise removal, '' if noise not removed)\n")
        f.write("allow_reprocess: # true or false")
        f.write("choose_better_particle_polarity: # true or false")
        f.write("use_default_values: # true or false")
        f.write("default_temperature: # temperature in K used in corrections as fallback")
        f.write("default_pressure: # pressure in Pa used in corrections as fallback")
        f.write("default_flowrate: # flow rate in lpm used in corrections as fallback")
        f.write("include_flags: # include flags to the data file and make a separete flag file for each day, true or false")

def tubeloss(dpp,pflow,plength,temp,press):
    DPP,TEMP = np.meshgrid(dpp,temp)
    DPP,PRESS = np.meshgrid(dpp,press)
    DPP,PFLOW = np.meshgrid(dpp,pflow)
    rmuu = np.pi*af.particle_diffusivity(DPP,TEMP,PRESS)*plength/PFLOW
    pene = np.nan*np.ones(rmuu.shape)
    cond1=rmuu<0.02
    cond2=rmuu>=0.02
    pene[cond1] = 1. - 2.56*rmuu[cond1]**(2./3.) + 1.2*rmuu[cond1]+0.177*rmuu[cond1]**(4./3.)
    pene[cond2] = 0.819*np.exp(-3.657*rmuu[cond2]) + 0.097*np.exp(-22.3*rmuu[cond2]) + 0.032*np.exp(-57.0*rmuu[cond2])
    return pene

def read_file(fn,ftype):
    """
    Read NAIS raw data file into a pandas.DataFrame

    Parameters
    ----------

    fn : str
        Raw data filename with path

    ftype : str
        `"spectra"` (inverted size/mobility distribution) or
        `"records"` (diagnostic data and electrometer currents)

    Returns
    -------

    pandas.DataFrame
        Contents of the file

    str
        Explantions of flags, returned only if `ftype="records"`

    """

    with open(fn,'r') as f:

        header_found = False
        data_matrix = []
        flag_explanations = []
        lines = f.read().splitlines()
        
        for line in lines:

             # Skip empty and comments
             if (len(line)==0):
                 continue

             # Collect a list of flags and skip comments
             if line[:6]=="# flag":
                 # parse the line
                 diagnostic_comment_yaml = yaml.safe_load(line[7:].rstrip('\r\n'))
                 flag_name = list(diagnostic_comment_yaml.keys())[0]
                 flag_message = diagnostic_comment_yaml[flag_name]["message"]
                 flag_explanations.append([flag_name,flag_message])
             elif (line[0]=='#'):
                 continue
             else:
                 pass

             # Test if it is a header
             if (header_found==False):
                 if "opmode" in line:
                     delimiter = re.search('(.)opmode',line).group(1)
                     header = line.split(delimiter)
                     number_of_columns = len(header)
                     header_found = True
                     continue
                 else:
                     continue 
             else:
                 data_line = line.split(delimiter)
                 
                 if ((len(data_line)==number_of_columns) & ("opmode" not in data_line)):
                     data_matrix.append(data_line)
                 continue

    if len(data_matrix)==0:
        return None

    else:
        # Convert anything that can be converted to float and the rest is coerced to NaNs
        df = pd.DataFrame(columns = header, data = data_matrix)
        df_flags = pd.DataFrame(columns=["Flag","Message"], data = flag_explanations)

        # records: start_time, end_time, opmode, data..., flags
        # spectra: start_time, end_time, opmode, data...
        if ftype=="records":
            df.iloc[:,3:-1] = df.iloc[:,3:-1].apply(pd.to_numeric, errors='coerce').astype(float)
        if ftype=="spectra":
            df.iloc[:,3:] = df.iloc[:,3:].apply(pd.to_numeric, errors='coerce').astype(float)

        # Establish begin_time (first column) as index
        df = df.set_index(df.columns[0])
        df.index = pd.to_datetime(df.index)

        # if there is no tz information set the timezone to UTC
        df.index = [t.tz_localize('UTC') if (t.tzinfo is None) else t for t in df.index]
        
        if ftype=="records":
            return df, df_flags
        if ftype=="spectra":
            return df

def average_mob(y,h):
    data = pd.DataFrame([])
    
    for i in range(0,len(mob_ion_geomeans)):
        if i==0:
            y_block = y.iloc[:,h>mob_ion_geomeans[i]]
        else:
            y_block = y.iloc[:,((h>mob_ion_geomeans[i]) & (h<=mob_ion_geomeans[i-1]))]

        data[i] = y_block.median(axis=1)

    y_block = y.iloc[:,h<=mob_ion_geomeans[i]]
    data[i+1] = y_block.mean(axis=1)

    return data

def average_dp(y,h):
    data = pd.DataFrame([])

    for i in range(0,len(dp_par_geomeans)):
        if i==0:
            y_block = y.iloc[:,h<dp_par_geomeans[i]]
        else:
            y_block = y.iloc[:,((h<dp_par_geomeans[i]) & (h>=dp_par_geomeans[i-1]))]

        data[i] = y_block.median(axis=1)

    y_block = y.iloc[:,h>=dp_par_geomeans[i]]
    data[i+1] = y_block.mean(axis=1)

    return data

def find_diagnostic_names(diag_params):

    sampleflow_name=None
    temperature_name=None
    pressure_name=None

    for temp_name in possible_temperature_names:
         if temp_name in diag_params:
             temperature_name = temp_name
             break

    for pres_name in possible_pressure_names:
        if pres_name in diag_params:
            pressure_name = pres_name
            break

    # try single flow sensor
    for flow_name in possible_sampleflow_names1:
        if flow_name in diag_params:
            sampleflow_name = flow_name
            break

    if sampleflow_name is None:
        # try two flow sensors
        sf_name = []
        for flow_name in possible_sampleflow_names2:
            if flow_name in diag_params:
                sf_name.append(flow_name)
        if len(sf_name)==2:
            sampleflow_name=sf_name

    return temperature_name, pressure_name, sampleflow_name

def process_data(df,mode):
    """
    Convert spectra files to .sum format
    """

    if (df is None):
        return None, None

    elif not df.index.to_series().is_monotonic_increasing:
        return None, None

    else:
        df_columns = df.columns
        df_inverter_reso = int((len(df_columns)-2)/4)

        neg_df = df.iloc[:,2:2+df_inverter_reso]
        pos_df = df.iloc[:,2+2*df_inverter_reso:2+3*df_inverter_reso]

        if mode=="ions":
            mob_ion_inv = np.array([float(re.findall(r"[-+]?\d*\.\d+|\d+",y)[0])
                                    for y in df_columns[2:2+df_inverter_reso]])

            neg_df = average_mob(neg_df,mob_ion_inv)
            pos_df = average_mob(pos_df,mob_ion_inv)

            # Convert to number size distributions
            neg_df = neg_df * dlogmob_ion / dlogdp_ion
            pos_df = pos_df * dlogmob_ion / dlogdp_ion

        if mode=="particles":
            dp_par_inv = 2.0*np.array([float(re.findall(r"[-+]?\d*\.\d+|\d+",y)[0])
                                       for y in df_columns[2:2+df_inverter_reso]])
        
            neg_df = average_dp(neg_df,dp_par_inv)
            pos_df = average_dp(pos_df,dp_par_inv)

        # Construct the headers
        if mode=="ions":
            df_header = dp_ion*1e-9
        if mode=="particles":
            df_header = dp_par*1e-9

        negdf = pd.DataFrame(columns=df_header, index=df.index, data=neg_df.values)
        posdf = pd.DataFrame(columns=df_header, index=df.index, data=pos_df.values)

        negdf.index.name = "Time"
        posdf.index.name= "Time"

        if negdf.isna().all().all():
            negdf = None
        if posdf.isna().all().all():
            posdf = None

        return negdf, posdf

def get_environmental_data(
    df,
    rec,
    mode,
    use_default_values,
    default_pressure,
    default_temperature,
    default_flowrate):

    if ((rec is None) or (df is None)):
        return None,None,None

    else:        
        # Extract the records that match the mode
        if mode=="ions":
            df_rec = rec[rec.opmode=='ions']
        if mode=="particles":
            df_rec = rec[rec.opmode=='particles']

        if not df_rec.index.to_series().is_monotonic_increasing:
            return None,None,None
        
        df_rec = df_rec.reindex(df.index,method="nearest")

        # Check that the relevant diagnostic data is found
        t_name,p_name,sf_name = find_diagnostic_names(list(df_rec))

        if t_name is not None:
            t_df = 273.15 + pd.DataFrame(df_rec[t_name].astype(float))
            # Values may be missing: e.g. sensor is broken
            if (t_df.isna().all().all() and use_default_values):
                t_df = pd.DataFrame(index = df.index)
                t_df[0] = default_temperature
        elif use_default_values:
            t_df = pd.DataFrame(index = df.index)
            t_df[0] = default_temperature
        else:
            t_df = None

        if p_name is not None:
            p_df = 100.0 * pd.DataFrame(df_rec[p_name].astype(float))
            if (p_df.isna().all().all() and use_default_values):
                p_df = pd.DataFrame(index = df.index)
                p_df[0] = default_pressure
        elif use_default_values:
            p_df = pd.DataFrame(index = df.index)
            p_df[0] = default_pressure
        else:
            p_df = None

        if sf_name is not None:
            if len(sf_name)==2:
                flow_df = pd.DataFrame(df_rec[sf_name].sum(axis=1,min_count=2).astype(float))
            else:
                flow_df = pd.DataFrame(df_rec[sf_name].astype(float))
            # Test if the sampleflow is in cm3/s (old models) or
            # l/min and if necessary convert to l/min
            if (np.nanmedian(flow_df)>300):
                flow_df = (flow_df/1000.0) * 60.0
            else:
                pass
            if (flow_df.isna().all().all() and use_default_values):
                flow_df = pd.DataFrame(index = df.index)
                flow_df[0] = default_flowrate
        elif use_default_values:
            flow_df = pd.DataFrame(index = df.index)
            flow_df[0] = default_flowrate
        else:
            flow_df = None

        # Sanity check the values
        if t_df is not None:
            t_df = t_df.where(((t_df>=223.)|(t_df<=353.)),np.nan)
        
        if p_df is not None:
            p_df = p_df.where(((p_df>=37000.)|(p_df<=121000.)),np.nan)
        
        if flow_df is not None:
            flow_df = flow_df.where(((flow_df>=48.)|(flow_df<=65.)),np.nan)
     
        return t_df, p_df, flow_df

def bring_to_sealevel(
    df,
    t_df,
    p_df):
    """
    Notes
    -----

    NAIS keeps constant volumetric flowrate, However air expands
    and compresses depending on the pressure and temperature, chaging
    the number of particles per unit volume. In order to compare
    concentrations we need to transform the concentrations to standard
    conditions.
    """

    if ((df is None) or (t_df is None) or (p_df is None)):
        return None
    else:
        stp_corr_df = (pres_ref*t_df.values)/(temp_ref*p_df.values)
        df = stp_corr_df * df
        
        return df

def correct_inlet_losses(
    df,
    mode,
    pipe_length,
    t_df,
    p_df,
    flow_df):

    if ((df is None) or (t_df is None) or (p_df is None) or (flow_df is None)):
        return None

    # Diffusion loss correction
    if mode=="ions":
        throughput = tubeloss(dp_ion*1e-9,flow_df.values*1.667e-5,pipe_length,t_df.values,p_df.values)
    if mode=="particles":
        throughput = tubeloss(dp_par*1e-9,flow_df.values*1.667e-5,pipe_length,t_df.values,p_df.values)
    
    df = df / throughput

    return df
 
def wagner_ion_mode_correction(df):
    if df is None:
        return None
    else:
        roberts_corr = 0.713*dp_ion**0.120
        df = df / roberts_corr
     
        return df

def add_flags(
    df,
    rec,
    mode):

    if ((rec is None) or (df is None)):
        return None

    else:        
        # Extract the records that match the mode
        if mode=="ions":
            df_rec = rec[rec.opmode=='ions']
        if mode=="particles":
            df_rec = rec[rec.opmode=='particles']

        if not df_rec.index.to_series().is_monotonic_increasing:
            return None
        
        df_rec = df_rec.reindex(df.index,method="nearest")

        # Read the flags column from records and add it to 
        # the final data as the first column
        df.insert(0,"Flags",df_rec["flags"])

        return df


def clean_elem_noise(
    df,
    rec,
    mode,
    polarity,
    inverter_name):

    if ((df is None) or (rec is None)):
        return None

    if inverter_name =="hires_25":
        if mode=="ions":
            if polarity=="neg":
                elm2dp = ions_neg_v141_hrnd_elm25_chv
            if polarity=="pos":
                elm2dp = ions_pos_v141_hrnd_elm25_chv
        if mode=="particles":
            if polarity=="neg":
                elm2dp = particles_neg_v14_hrnd_elm25_chv
            if polarity=="pos":
                elm2dp = particles_pos_v14_hrnd_elm25_chv
    elif inverter_name == "lores_25":
        if mode=="ions":
            if polarity=="neg":
                elm2dp = ions_neg_v141_lrnd_elm25_chv
            if polarity=="pos":
                elm2dp = ions_pos_v141_lrnd_elm25_chv
        if mode=="particles":
            if polarity=="neg":
                elm2dp = particles_neg_v14_lrnd_elm25_chv
            if polarity=="pos": 
                elm2dp = particles_pos_v14_lrnd_elm25_chv
    elif inverter_name == "lores_21":
        if mode=="ions":
            if polarity=="neg":
                elm2dp = ions_neg_v14_lrnd
            if polarity=="pos":
                elm2dp = ions_pos_v14_lrnd
        if mode=="particles":
            if polarity=="neg":
                elm2dp = particles_neg_v14_lrnd
            if polarity=="pos": 
                elm2dp = particles_pos_v14_lrnd
    else:
        return df

    # Extract the records that match the mode
    if mode=="ions":
        df_rec = rec[rec.opmode=='ions']
    if mode=="particles":
        df_rec = rec[rec.opmode=='particles']

    df_rec = df_rec.reindex(df.index,method="nearest")

    elm2dp = {int(k):v for k,v in elm2dp.items()}
    number_of_elms = len(elm2dp)

    # Rolling time windows
    reso_in_seconds = (df.index[1]-df.index[0]).seconds
    small_window = int((10.*60.)/(reso_in_seconds))          # 10 minutes
    medium_window = int((4.*60.*60.)/(reso_in_seconds))      # 6 hours
    large_window = int((12.*60.*60.)/(reso_in_seconds))      # 12 hours

    # NOISE LEVEL FROM THE RECORDS
    if polarity == "neg":
        df_std = df_rec.iloc[:,2+2*number_of_elms:2+3*number_of_elms]
    if polarity == "pos":
        df_std = df_rec.iloc[:,2+3*number_of_elms:2+4*number_of_elms]
    else:
        return None

    # Set index to electrometer number
    elm_header = np.arange(0,number_of_elms).astype(int)
    df_std.columns = elm_header

    # Calculate noise level at each diameter
    df_std2 = df.copy()

    for d in df.columns.values:
        elms = []
        for elm in df_std.columns.values:
            if ((d >= elm2dp[elm][0]) & (d <= elm2dp[elm][1])):
                elms.append(elm)
        df_std2[d] = df_std[elms].mean(axis=1).values
    
    # Apply medium window to get rid of small fluctuations in electrometer noise
    df_std2 = df_std2.rolling(medium_window, min_periods=int((medium_window+1.)/2.), center=True).median()      
    
    # Get the median noise
    median_std2 = np.nanmedian(df_std2)

    # Then find where the noise is more than N times median 
    N = 500
    df_std3 = df_std2.where((df_std2>N*median_std2), np.nan)
    # NOISE LEVEL FROM THE INVERTED DATA

    # Calculate standard deviation in 10 min segments
    df2 = df.rolling(small_window, min_periods=int((small_window+1.)/2.), center=True).std()

    # In a bigger window (12 hours) calculate the 75th quantile of the standard deviations
    # (semi)continuous noise causes higher values compared to normal and rare sudden changes in conc
    df2 = df2.rolling(large_window, min_periods=int((large_window+1.)/2.), center=True).quantile(0.75)

    # find where the noise is more than M times the median
    M = 7
    threshold = M*np.nanmedian(df2)
    
    df3 = df2.where(df2 > threshold, np.nan)

    # REMOVE DATA FROM WHERE THE ELECTROMETER NOISE AND THE INVERTED DATA NOISE AGREE
    df = df[df3.isna() & df_std3.isna()]
    
    return df


def clean_corona_ions(
    df,
    rec,
    mode):

    if ((df is None) or (rec is None)):
        return None

    # Only consider likely limit range
    lower = 1.5e-9
    upper = 5.0e-9
    c = (lower <= df.columns.values) & (upper >= df.columns.values)
    df2 = df.loc[:, c]
 
    # Find maximum difference between size bin medians
    corona_lim = df2.columns.values[df2.median().diff().abs().argmax()]
 
    # Set values below corona ion limit to NaNs
    df.iloc[:,df.columns.values<=corona_lim]=np.nan

    return df

def choose_particle_polarity(negpar,pospar):

    if ((negpar is None) & (pospar is None)):
        return None
    elif ((negpar is None) & (pospar is not None)):
        return "pos"
    elif ((pospar is None) & (negpar is not None)):
        return "neg"
    else:
        pass

    # Calculate number concentration between 2-3 nm
    # and determine the better polarity based on that
    neg_conc = af.calc_conc(negpar,2e-9,3e-9)
    pos_conc = af.calc_conc(pospar,2e-9,3e-9)

    neg_med = float(neg_conc.median())
    pos_med = float(pos_conc.median())

    if (np.isnan(neg_med) & np.isnan(pos_med)):
        return None
    elif np.isnan(neg_med):
        return "pos"
    elif np.isnan(pos_med):
        return "neg"
    elif (pos_med<neg_med):
        return "pos"
    else:
        return "neg"

def nais_processor(config_file):
    """ Processes NAIS data

    Parameters
    ----------

    config_file : str
        full path to configuration file

    """

    with open(config_file,'r') as stream:
        config = yaml.safe_load(stream)
        load_path = config['data_folder']
        save_path = config['processed_folder']
        start_date = config['start_date']
        database = config['database_file']
        location = config['measurement_location']
        end_date = config['end_date']
        allow_reprocess = config["allow_reprocess"]
        pipelength = config['inlet_length']
        do_inlet_loss_correction = config['do_inlet_loss_correction']
        convert_to_standard_conditions = config['convert_to_standard_conditions']
        do_wagner_ion_mode_correction = config["do_wagner_ion_mode_correction"]
        choose_better_particle_polarity=config["choose_better_particle_polarity"]
        remove_noisy_electrometers = config["remove_noisy_electrometers"]
        remove_corona_ions = config["remove_corona_ions"]
        inverter_name = config["inverter_name"]
        use_default_values = config["use_default_values"]
        default_temperature = config["default_temperature"]
        default_pressure = config["default_pressure"]
        default_flowrate = config["default_flowrate"]
        include_flags = config["include_flags"]

    db = TinyDB(database)
    check = Query()

    assert isinstance(start_date,date)
    assert (end_date=='' or isinstance(end_date,date))
    assert os.path.exists(save_path)
    assert all([os.path.exists(x) for x in load_path])
    assert isinstance(allow_reprocess,bool)
    assert isinstance(remove_corona_ions,bool)
    assert isinstance(remove_noisy_electrometers,bool)
    assert isinstance(convert_to_standard_conditions,bool)
    assert isinstance(do_wagner_ion_mode_correction,bool)
    assert isinstance(do_inlet_loss_correction,bool)
    assert isinstance(choose_better_particle_polarity,bool)
    assert ((inverter_name=="hires_25") | (inverter_name=="lores_25") | (inverter_name=="lores_21") | (inverter_name==''))
    assert (isinstance(pipelength,(float, int)) & (not isinstance(pipelength,bool)))
    assert isinstance(use_default_values,bool)
    assert ((isinstance(default_temperature,(float, int)) & (not isinstance(default_temperature,bool))) | (default_temperature==''))
    assert ((isinstance(default_pressure,(float, int)) & (not isinstance(default_pressure,bool))) |  (default_pressure==''))
    assert ((isinstance(default_flowrate,(float, int)) & (not isinstance(default_flowrate,bool))) | (default_flowrate==''))
    assert isinstance(include_flags,bool)

    end_date = date.today() if end_date=='' else end_date

    db = TinyDB(database)
    check = Query()

    start_dt = pd.to_datetime(start_date)
    end_dt = pd.to_datetime(end_date)

    start_date_str = start_dt.strftime("%Y%m%d")
    end_date_str = end_dt.strftime("%Y%m%d")

    # list existing dates based on if diagnostic file was found
    list_of_existing_dates = [x["timestamp"] for x in db.search(check.diagnostics.exists())]

    if len(list_of_existing_dates)==0:
        print("building database...")
        list_of_datetimes = pd.date_range(start=start_date_str, end=end_date_str)
    else:
        last_existing_date = sorted(list_of_existing_dates)[-1]
        list_of_datetimes = pd.date_range(start=last_existing_date, end=end_date_str)
    
    # Add unprocessed datafiles to the database
    for x in list_of_datetimes:
        if (x.strftime("%Y%m%d") in list_of_existing_dates):
            continue
        else:
            files_found=False
            for z in load_path:
                for y in filename_formats:

                    ion_fn = os.path.join(z,x.strftime(y[0]))
                    particle_fn = os.path.join(z,x.strftime(y[1]))
                    diagnostic_fn = os.path.join(z,x.strftime(y[2]))

                    if ( (os.path.exists(ion_fn) | # ions
                         os.path.exists(particle_fn)) & # particles
                         os.path.exists(diagnostic_fn) # diagnostics
                       ):

                        dtstr = x.strftime("%Y%m%d")

                        db.insert(
                            {"timestamp":dtstr,
                            "diagnostics":diagnostic_fn}
                            )

                        if os.path.exists(ion_fn):
                            db.update(
                                {"ions":ion_fn},
                                check.timestamp==dtstr)

                        if os.path.exists(particle_fn):
                            db.update(
                                {"particles":particle_fn},
                                check.timestamp==dtstr)

                        files_found=True
                        break

                if files_found:
                    break

    # From the database find the last day with processed data
    processed_days = db.search(
        check.processed_neg_ion_file.exists() |
        check.processed_pos_ion_file.exists() |
        check.processed_neg_particle_file.exists() |
        check.processed_pos_particle_file.exists())

    if len(processed_days)!=0:
        last_day=np.max([datetime.strptime(x["timestamp"],"%Y%m%d") for x in processed_days]).strftime("%Y%m%d")
    else:
        last_day=end_date_str

    if allow_reprocess:
        iterator1 = iter(db.search(
         (check.diagnostics.exists() &
          (check.ions.exists() |
          check.particles.exists()) &
          (check.timestamp>=start_date_str) &
          (check.timestamp<=end_date_str))))
    else:
        iterator1 = iter(db.search(
            (check.diagnostics.exists() &
             (check.ions.exists() |
             check.particles.exists()) &
             (check.timestamp>=last_day) &
             (check.timestamp>=start_date_str) &
             (check.timestamp<=end_date_str))
            ))

    for x in iterator1:

        print("processing %s (%s)" % (x["timestamp"],location))

        ions_exist=bool(db.search(
            check.ions.exists() &
            (check.timestamp==x["timestamp"])))

        particles_exist=bool(db.search(
            check.particles.exists() &
            (check.timestamp==x["timestamp"])))

        records,flags = read_file(x["diagnostics"],"records")

        if include_flags:
            my_save_path_flags = os.path.join(save_path,"NAIS"+x["timestamp"]+".flags")
            flags.to_csv(my_save_path_flags,index=False)

        # ions
        if ions_exist:

            ions = read_file(x["ions"],"spectra")

            negion_datamatrix,posion_datamatrix = process_data(ions,"ions")

            if (convert_to_standard_conditions or do_inlet_loss_correction):
                temperature_ion_df,pressure_ion_df,flowrate_ion_df = get_environmental_data(
                    negion_datamatrix,
                    records,
                    "ions",
                    use_default_values,
                    default_pressure,
                    default_temperature,
                    default_flowrate)

            if convert_to_standard_conditions:
                 negion_datamatrix = bring_to_sealevel(negion_datamatrix,temperature_ion_df,pressure_ion_df)
                 posion_datamatrix = bring_to_sealevel(posion_datamatrix,temperature_ion_df,pressure_ion_df)

            if do_inlet_loss_correction:
                 negion_datamatrix = correct_inlet_losses(
                      negion_datamatrix,
                      "ions",
                      pipelength,
                      temperature_ion_df,
                      pressure_ion_df,
                      flowrate_ion_df)
                 posion_datamatrix = correct_inlet_losses(
                      posion_datamatrix,
                      "ions",
                      pipelength,
                      temperature_ion_df,
                      pressure_ion_df,
                      flowrate_ion_df)

            if do_wagner_ion_mode_correction:
                negion_datamatrix = wagner_ion_mode_correction(negion_datamatrix)
                posion_datamatrix = wagner_ion_mode_correction(posion_datamatrix)
 
            if remove_noisy_electrometers: 
                negion_datamatrix = clean_elem_noise(
                       negion_datamatrix,
                       records,
                       "ions",
                       "neg",
                       inverter_name)
 
                posion_datamatrix = clean_elem_noise(
                       posion_datamatrix,
                       records,
                       "ions",
                       "pos",
                       inverter_name)
            
            if (negion_datamatrix is not None):

                if include_flags:
                    negion_datamatrix = add_flags(negion_datamatrix,records,"ions")
                    
                my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"nds.sum")
                negion_datamatrix.to_csv(my_save_path_neg)

                db.update({"processed_neg_ion_file": my_save_path_neg},
                    check.timestamp==x["timestamp"])

            if (posion_datamatrix is not None):

                if include_flags:
                    posion_datamatrix = add_flags(posion_datamatrix,records,"ions")

                my_save_path_pos = os.path.join(save_path,"NAISp"+x["timestamp"]+"nds.sum")
                posion_datamatrix.to_csv(my_save_path_pos)

                db.update({"processed_pos_ion_file": my_save_path_pos},
                    check.timestamp==x["timestamp"])

        # particles
        if particles_exist:

            particles = read_file(x["particles"], "spectra")

            negpar_datamatrix,pospar_datamatrix = process_data(particles,"particles")

            if (convert_to_standard_conditions or do_inlet_loss_correction):
                temperature_particle_df,pressure_particle_df,flowrate_particle_df = get_environmental_data(
                    negpar_datamatrix,
                    records,
                    "particles",
                    use_default_values,
                    default_pressure,
                    default_temperature,
                    default_flowrate)

            if convert_to_standard_conditions:
                 negpar_datamatrix = bring_to_sealevel(negpar_datamatrix,temperature_ion_df,pressure_ion_df)
                 pospar_datamatrix = bring_to_sealevel(pospar_datamatrix,temperature_ion_df,pressure_ion_df)

            if do_inlet_loss_correction:
                 negpar_datamatrix = correct_inlet_losses(
                      negpar_datamatrix,
                      "particles",
                      pipelength,
                      temperature_particle_df,
                      pressure_particle_df,
                      flowrate_particle_df)
                 pospar_datamatrix = correct_inlet_losses(
                      pospar_datamatrix,
                      "particles",
                      pipelength,
                      temperature_particle_df,
                      pressure_particle_df,
                      flowrate_particle_df)

            if remove_noisy_electrometers:
                negpar_datamatrix = clean_elem_noise(
                       negpar_datamatrix,
                       records,
                       "particles",
                       "neg",
                       inverter_name)
 
                pospar_datamatrix = clean_elem_noise(
                       pospar_datamatrix,
                       records,
                       "particles",
                       "pos",
                       inverter_name)
 
            if choose_better_particle_polarity:
                better_polarity = choose_particle_polarity(
                        negpar_datamatrix,
                        pospar_datamatrix)
            else:
                better_polarity = None
 
            if remove_corona_ions:
                negpar_datamatrix = clean_corona_ions(
                    negpar_datamatrix,
                    records,
                    "particles")
 
                pospar_datamatrix = clean_corona_ions(
                    pospar_datamatrix,
                    records,
                    "particles")

            if ((negpar_datamatrix is not None) & 
               ((choose_better_particle_polarity==True) & (better_polarity=="neg") |
               (choose_better_particle_polarity==False))):
                
                if include_flags:
                    negpar_datamatrix = add_flags(negpar_datamatrix,records,"particles")

                my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"np.sum")
                negpar_datamatrix.to_csv(my_save_path_neg)

                db.update({"processed_neg_particle_file": my_save_path_neg},
                    check.timestamp==x["timestamp"])

            if ((pospar_datamatrix is not None) &
               ((choose_better_particle_polarity==True) & (better_polarity=="pos") |
               (choose_better_particle_polarity==False))): 

                if include_flags:
                    pospar_datamatrix = add_flags(pospar_datamatrix,records,"particles")

                my_save_path_pos=os.path.join(save_path,"NAISp"+x["timestamp"]+"np.sum")
                pospar_datamatrix.to_csv(my_save_path_pos)

                db.update({"processed_pos_particle_file": my_save_path_pos},
                    check.timestamp==x["timestamp"])

    print("Done!")


def combine_databases(database_list, combined_database):
    """Combine JSON databases

    If the measurement setup changes one may have to use multiple configuration files
    which results in multiple databases. With this function you can combine the databases
    into a single database after processing.

    Parameters
    ----------

    database_list : str
        List of full paths to databases that should be combined

        First database should have the earliest data, second database
        the second earliest and so on

    combined_database : str
        full path to combined database
    
    """

    DB = {}
    i = 0

    for database in database_list:

        fid=open(database)

        database_json=json.load(fid)

        for key in database_json["_default"]:
            DB[i] = database_json["_default"][key]
            i=i+1

    with open(combined_database, "w") as f:
        json.dump({"_default":DB},f)

Functions

def combine_databases(database_list, combined_database)

Combine JSON databases

If the measurement setup changes one may have to use multiple configuration files which results in multiple databases. With this function you can combine the databases into a single database after processing.

Parameters

database_list : str

List of full paths to databases that should be combined

First database should have the earliest data, second database the second earliest and so on

combined_database : str
full path to combined database
Expand source code
def combine_databases(database_list, combined_database):
    """Combine JSON databases

    If the measurement setup changes one may have to use multiple configuration files
    which results in multiple databases. With this function you can combine the databases
    into a single database after processing.

    Parameters
    ----------

    database_list : str
        List of full paths to databases that should be combined

        First database should have the earliest data, second database
        the second earliest and so on

    combined_database : str
        full path to combined database
    
    """

    DB = {}
    i = 0

    for database in database_list:

        fid=open(database)

        database_json=json.load(fid)

        for key in database_json["_default"]:
            DB[i] = database_json["_default"][key]
            i=i+1

    with open(combined_database, "w") as f:
        json.dump({"_default":DB},f)
def make_config_template(fn)

Make a configuration file template

Parameters

fn : str

full path to configuration file

For example /home/user/config.yml

Notes

The default values are used to calculate the corrections in case the data is not available in the diagnostic data either due missing or broken sensor.

Expand source code
def make_config_template(fn):
    """
    Make a configuration file template

    Parameters
    ----------

    fn : str
        full path to configuration file

        For example `/home/user/config.yml`

    Notes
    -----

    The default values are used to calculate the corrections
    in case the data is not available in the diagnostic data 
    either due missing or broken sensor.

    """
    with open(fn,"w") as f:
        f.write("measurement_location: # Name of the measurement site\n")
        f.write("data_folder: # Full paths to raw data folders\n")
        f.write("- # Data folder 1\n")
        f.write("- # Data folder 2, and so on...\n")
        f.write("processed_folder: # Full path to folder where procesed data is saved\n")
        f.write("database_file: # Full path to database file (will be created on first run) \n")
        f.write("start_date: # Format: yyyy-mm-dd\n")
        f.write("end_date: # Format: yyyy-mm-dd or '' for current day\n")
        f.write("inlet_length: # length of inlet in meters\n")
        f.write("do_inlet_loss_correction: # true or false\n")
        f.write("convert_to_standard_conditions: # true or false\n")
        f.write("do_wagner_ion_mode_correction: # true or false\n")
        f.write("remove_corona_ions: # true or false\n")
        f.write("remove_noisy_electrometers: # true or false\n")
        f.write("inverter_name: # hires_25, lores_25, lores_21 or '' (needed for noise removal, '' if noise not removed)\n")
        f.write("allow_reprocess: # true or false")
        f.write("choose_better_particle_polarity: # true or false")
        f.write("use_default_values: # true or false")
        f.write("default_temperature: # temperature in K used in corrections as fallback")
        f.write("default_pressure: # pressure in Pa used in corrections as fallback")
        f.write("default_flowrate: # flow rate in lpm used in corrections as fallback")
        f.write("include_flags: # include flags to the data file and make a separete flag file for each day, true or false")
def nais_processor(config_file)

Processes NAIS data

Parameters

config_file : str
full path to configuration file
Expand source code
def nais_processor(config_file):
    """ Processes NAIS data

    Parameters
    ----------

    config_file : str
        full path to configuration file

    """

    with open(config_file,'r') as stream:
        config = yaml.safe_load(stream)
        load_path = config['data_folder']
        save_path = config['processed_folder']
        start_date = config['start_date']
        database = config['database_file']
        location = config['measurement_location']
        end_date = config['end_date']
        allow_reprocess = config["allow_reprocess"]
        pipelength = config['inlet_length']
        do_inlet_loss_correction = config['do_inlet_loss_correction']
        convert_to_standard_conditions = config['convert_to_standard_conditions']
        do_wagner_ion_mode_correction = config["do_wagner_ion_mode_correction"]
        choose_better_particle_polarity=config["choose_better_particle_polarity"]
        remove_noisy_electrometers = config["remove_noisy_electrometers"]
        remove_corona_ions = config["remove_corona_ions"]
        inverter_name = config["inverter_name"]
        use_default_values = config["use_default_values"]
        default_temperature = config["default_temperature"]
        default_pressure = config["default_pressure"]
        default_flowrate = config["default_flowrate"]
        include_flags = config["include_flags"]

    db = TinyDB(database)
    check = Query()

    assert isinstance(start_date,date)
    assert (end_date=='' or isinstance(end_date,date))
    assert os.path.exists(save_path)
    assert all([os.path.exists(x) for x in load_path])
    assert isinstance(allow_reprocess,bool)
    assert isinstance(remove_corona_ions,bool)
    assert isinstance(remove_noisy_electrometers,bool)
    assert isinstance(convert_to_standard_conditions,bool)
    assert isinstance(do_wagner_ion_mode_correction,bool)
    assert isinstance(do_inlet_loss_correction,bool)
    assert isinstance(choose_better_particle_polarity,bool)
    assert ((inverter_name=="hires_25") | (inverter_name=="lores_25") | (inverter_name=="lores_21") | (inverter_name==''))
    assert (isinstance(pipelength,(float, int)) & (not isinstance(pipelength,bool)))
    assert isinstance(use_default_values,bool)
    assert ((isinstance(default_temperature,(float, int)) & (not isinstance(default_temperature,bool))) | (default_temperature==''))
    assert ((isinstance(default_pressure,(float, int)) & (not isinstance(default_pressure,bool))) |  (default_pressure==''))
    assert ((isinstance(default_flowrate,(float, int)) & (not isinstance(default_flowrate,bool))) | (default_flowrate==''))
    assert isinstance(include_flags,bool)

    end_date = date.today() if end_date=='' else end_date

    db = TinyDB(database)
    check = Query()

    start_dt = pd.to_datetime(start_date)
    end_dt = pd.to_datetime(end_date)

    start_date_str = start_dt.strftime("%Y%m%d")
    end_date_str = end_dt.strftime("%Y%m%d")

    # list existing dates based on if diagnostic file was found
    list_of_existing_dates = [x["timestamp"] for x in db.search(check.diagnostics.exists())]

    if len(list_of_existing_dates)==0:
        print("building database...")
        list_of_datetimes = pd.date_range(start=start_date_str, end=end_date_str)
    else:
        last_existing_date = sorted(list_of_existing_dates)[-1]
        list_of_datetimes = pd.date_range(start=last_existing_date, end=end_date_str)
    
    # Add unprocessed datafiles to the database
    for x in list_of_datetimes:
        if (x.strftime("%Y%m%d") in list_of_existing_dates):
            continue
        else:
            files_found=False
            for z in load_path:
                for y in filename_formats:

                    ion_fn = os.path.join(z,x.strftime(y[0]))
                    particle_fn = os.path.join(z,x.strftime(y[1]))
                    diagnostic_fn = os.path.join(z,x.strftime(y[2]))

                    if ( (os.path.exists(ion_fn) | # ions
                         os.path.exists(particle_fn)) & # particles
                         os.path.exists(diagnostic_fn) # diagnostics
                       ):

                        dtstr = x.strftime("%Y%m%d")

                        db.insert(
                            {"timestamp":dtstr,
                            "diagnostics":diagnostic_fn}
                            )

                        if os.path.exists(ion_fn):
                            db.update(
                                {"ions":ion_fn},
                                check.timestamp==dtstr)

                        if os.path.exists(particle_fn):
                            db.update(
                                {"particles":particle_fn},
                                check.timestamp==dtstr)

                        files_found=True
                        break

                if files_found:
                    break

    # From the database find the last day with processed data
    processed_days = db.search(
        check.processed_neg_ion_file.exists() |
        check.processed_pos_ion_file.exists() |
        check.processed_neg_particle_file.exists() |
        check.processed_pos_particle_file.exists())

    if len(processed_days)!=0:
        last_day=np.max([datetime.strptime(x["timestamp"],"%Y%m%d") for x in processed_days]).strftime("%Y%m%d")
    else:
        last_day=end_date_str

    if allow_reprocess:
        iterator1 = iter(db.search(
         (check.diagnostics.exists() &
          (check.ions.exists() |
          check.particles.exists()) &
          (check.timestamp>=start_date_str) &
          (check.timestamp<=end_date_str))))
    else:
        iterator1 = iter(db.search(
            (check.diagnostics.exists() &
             (check.ions.exists() |
             check.particles.exists()) &
             (check.timestamp>=last_day) &
             (check.timestamp>=start_date_str) &
             (check.timestamp<=end_date_str))
            ))

    for x in iterator1:

        print("processing %s (%s)" % (x["timestamp"],location))

        ions_exist=bool(db.search(
            check.ions.exists() &
            (check.timestamp==x["timestamp"])))

        particles_exist=bool(db.search(
            check.particles.exists() &
            (check.timestamp==x["timestamp"])))

        records,flags = read_file(x["diagnostics"],"records")

        if include_flags:
            my_save_path_flags = os.path.join(save_path,"NAIS"+x["timestamp"]+".flags")
            flags.to_csv(my_save_path_flags,index=False)

        # ions
        if ions_exist:

            ions = read_file(x["ions"],"spectra")

            negion_datamatrix,posion_datamatrix = process_data(ions,"ions")

            if (convert_to_standard_conditions or do_inlet_loss_correction):
                temperature_ion_df,pressure_ion_df,flowrate_ion_df = get_environmental_data(
                    negion_datamatrix,
                    records,
                    "ions",
                    use_default_values,
                    default_pressure,
                    default_temperature,
                    default_flowrate)

            if convert_to_standard_conditions:
                 negion_datamatrix = bring_to_sealevel(negion_datamatrix,temperature_ion_df,pressure_ion_df)
                 posion_datamatrix = bring_to_sealevel(posion_datamatrix,temperature_ion_df,pressure_ion_df)

            if do_inlet_loss_correction:
                 negion_datamatrix = correct_inlet_losses(
                      negion_datamatrix,
                      "ions",
                      pipelength,
                      temperature_ion_df,
                      pressure_ion_df,
                      flowrate_ion_df)
                 posion_datamatrix = correct_inlet_losses(
                      posion_datamatrix,
                      "ions",
                      pipelength,
                      temperature_ion_df,
                      pressure_ion_df,
                      flowrate_ion_df)

            if do_wagner_ion_mode_correction:
                negion_datamatrix = wagner_ion_mode_correction(negion_datamatrix)
                posion_datamatrix = wagner_ion_mode_correction(posion_datamatrix)
 
            if remove_noisy_electrometers: 
                negion_datamatrix = clean_elem_noise(
                       negion_datamatrix,
                       records,
                       "ions",
                       "neg",
                       inverter_name)
 
                posion_datamatrix = clean_elem_noise(
                       posion_datamatrix,
                       records,
                       "ions",
                       "pos",
                       inverter_name)
            
            if (negion_datamatrix is not None):

                if include_flags:
                    negion_datamatrix = add_flags(negion_datamatrix,records,"ions")
                    
                my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"nds.sum")
                negion_datamatrix.to_csv(my_save_path_neg)

                db.update({"processed_neg_ion_file": my_save_path_neg},
                    check.timestamp==x["timestamp"])

            if (posion_datamatrix is not None):

                if include_flags:
                    posion_datamatrix = add_flags(posion_datamatrix,records,"ions")

                my_save_path_pos = os.path.join(save_path,"NAISp"+x["timestamp"]+"nds.sum")
                posion_datamatrix.to_csv(my_save_path_pos)

                db.update({"processed_pos_ion_file": my_save_path_pos},
                    check.timestamp==x["timestamp"])

        # particles
        if particles_exist:

            particles = read_file(x["particles"], "spectra")

            negpar_datamatrix,pospar_datamatrix = process_data(particles,"particles")

            if (convert_to_standard_conditions or do_inlet_loss_correction):
                temperature_particle_df,pressure_particle_df,flowrate_particle_df = get_environmental_data(
                    negpar_datamatrix,
                    records,
                    "particles",
                    use_default_values,
                    default_pressure,
                    default_temperature,
                    default_flowrate)

            if convert_to_standard_conditions:
                 negpar_datamatrix = bring_to_sealevel(negpar_datamatrix,temperature_ion_df,pressure_ion_df)
                 pospar_datamatrix = bring_to_sealevel(pospar_datamatrix,temperature_ion_df,pressure_ion_df)

            if do_inlet_loss_correction:
                 negpar_datamatrix = correct_inlet_losses(
                      negpar_datamatrix,
                      "particles",
                      pipelength,
                      temperature_particle_df,
                      pressure_particle_df,
                      flowrate_particle_df)
                 pospar_datamatrix = correct_inlet_losses(
                      pospar_datamatrix,
                      "particles",
                      pipelength,
                      temperature_particle_df,
                      pressure_particle_df,
                      flowrate_particle_df)

            if remove_noisy_electrometers:
                negpar_datamatrix = clean_elem_noise(
                       negpar_datamatrix,
                       records,
                       "particles",
                       "neg",
                       inverter_name)
 
                pospar_datamatrix = clean_elem_noise(
                       pospar_datamatrix,
                       records,
                       "particles",
                       "pos",
                       inverter_name)
 
            if choose_better_particle_polarity:
                better_polarity = choose_particle_polarity(
                        negpar_datamatrix,
                        pospar_datamatrix)
            else:
                better_polarity = None
 
            if remove_corona_ions:
                negpar_datamatrix = clean_corona_ions(
                    negpar_datamatrix,
                    records,
                    "particles")
 
                pospar_datamatrix = clean_corona_ions(
                    pospar_datamatrix,
                    records,
                    "particles")

            if ((negpar_datamatrix is not None) & 
               ((choose_better_particle_polarity==True) & (better_polarity=="neg") |
               (choose_better_particle_polarity==False))):
                
                if include_flags:
                    negpar_datamatrix = add_flags(negpar_datamatrix,records,"particles")

                my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"np.sum")
                negpar_datamatrix.to_csv(my_save_path_neg)

                db.update({"processed_neg_particle_file": my_save_path_neg},
                    check.timestamp==x["timestamp"])

            if ((pospar_datamatrix is not None) &
               ((choose_better_particle_polarity==True) & (better_polarity=="pos") |
               (choose_better_particle_polarity==False))): 

                if include_flags:
                    pospar_datamatrix = add_flags(pospar_datamatrix,records,"particles")

                my_save_path_pos=os.path.join(save_path,"NAISp"+x["timestamp"]+"np.sum")
                pospar_datamatrix.to_csv(my_save_path_pos)

                db.update({"processed_pos_particle_file": my_save_path_pos},
                    check.timestamp==x["timestamp"])

    print("Done!")
def read_file(fn, ftype)

Read NAIS raw data file into a pandas.DataFrame

Parameters

fn : str
Raw data filename with path
ftype : str
"spectra" (inverted size/mobility distribution) or "records" (diagnostic data and electrometer currents)

Returns

pandas.DataFrame
Contents of the file
str
Explantions of flags, returned only if ftype="records"
Expand source code
def read_file(fn,ftype):
    """
    Read NAIS raw data file into a pandas.DataFrame

    Parameters
    ----------

    fn : str
        Raw data filename with path

    ftype : str
        `"spectra"` (inverted size/mobility distribution) or
        `"records"` (diagnostic data and electrometer currents)

    Returns
    -------

    pandas.DataFrame
        Contents of the file

    str
        Explantions of flags, returned only if `ftype="records"`

    """

    with open(fn,'r') as f:

        header_found = False
        data_matrix = []
        flag_explanations = []
        lines = f.read().splitlines()
        
        for line in lines:

             # Skip empty and comments
             if (len(line)==0):
                 continue

             # Collect a list of flags and skip comments
             if line[:6]=="# flag":
                 # parse the line
                 diagnostic_comment_yaml = yaml.safe_load(line[7:].rstrip('\r\n'))
                 flag_name = list(diagnostic_comment_yaml.keys())[0]
                 flag_message = diagnostic_comment_yaml[flag_name]["message"]
                 flag_explanations.append([flag_name,flag_message])
             elif (line[0]=='#'):
                 continue
             else:
                 pass

             # Test if it is a header
             if (header_found==False):
                 if "opmode" in line:
                     delimiter = re.search('(.)opmode',line).group(1)
                     header = line.split(delimiter)
                     number_of_columns = len(header)
                     header_found = True
                     continue
                 else:
                     continue 
             else:
                 data_line = line.split(delimiter)
                 
                 if ((len(data_line)==number_of_columns) & ("opmode" not in data_line)):
                     data_matrix.append(data_line)
                 continue

    if len(data_matrix)==0:
        return None

    else:
        # Convert anything that can be converted to float and the rest is coerced to NaNs
        df = pd.DataFrame(columns = header, data = data_matrix)
        df_flags = pd.DataFrame(columns=["Flag","Message"], data = flag_explanations)

        # records: start_time, end_time, opmode, data..., flags
        # spectra: start_time, end_time, opmode, data...
        if ftype=="records":
            df.iloc[:,3:-1] = df.iloc[:,3:-1].apply(pd.to_numeric, errors='coerce').astype(float)
        if ftype=="spectra":
            df.iloc[:,3:] = df.iloc[:,3:].apply(pd.to_numeric, errors='coerce').astype(float)

        # Establish begin_time (first column) as index
        df = df.set_index(df.columns[0])
        df.index = pd.to_datetime(df.index)

        # if there is no tz information set the timezone to UTC
        df.index = [t.tz_localize('UTC') if (t.tzinfo is None) else t for t in df.index]
        
        if ftype=="records":
            return df, df_flags
        if ftype=="spectra":
            return df