CifFile.CifFile_module module
# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import try: from cStringIO import StringIO except ImportError: from io import StringIO # Python 2,3 compatibility try: from urllib import urlopen # for arbitrary opening from urlparse import urlparse, urljoin except: from urllib.request import urlopen from urllib.parse import urlparse, urljoin # The unicode type does not exist in Python3 as the str type # encompasses unicode. PyCIFRW tests for 'unicode' would fail # Suggestions for a better approach welcome. if isinstance(u"abc",str): #Python3 unicode = str __copyright = """ PYCIFRW License Agreement (Python License, Version 2) ----------------------------------------------------- 1. This LICENSE AGREEMENT is between the Australian Nuclear Science and Technology Organisation ("ANSTO"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("PyCIFRW") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use PyCIFRW alone or in any derivative version, provided, however, that this License Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates PyCIFRW or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to PyCIFRW. 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between ANSTO and Licensee. This License Agreement does not grant permission to use ANSTO trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees to be bound by the terms and conditions of this License Agreement. """ import re,sys from . import StarFile from .StarFile import StarList #put in global scope for exec statement try: import numpy #put in global scope for exec statement from .drel import drel_runtime #put in global scope for exec statement except ImportError: pass #will fail when using dictionaries for calcs from copy import copy #must be in global scope for exec statement def track_recursion(in_this_func): """Keep an eye on a function call to make sure that the key argument hasn't been seen before""" def wrapper(*args,**kwargs): key_arg = args[1] if key_arg in wrapper.called_list: print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg))) raise CifRecursionError( key_arg,wrapper.called_list[:]) #failure if len(wrapper.called_list) == 0: #first time wrapper.stored_use_defaults = kwargs.get("allow_defaults",False) print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults)) else: kwargs["allow_defaults"] = wrapper.stored_use_defaults wrapper.called_list.append(key_arg) print('Recursion watch: call stack: ' + repr(wrapper.called_list)) try: result = in_this_func(*args,**kwargs) except StarFile.StarDerivationError as s: if len(wrapper.called_list) == 1: #no more raise StarFile.StarDerivationFailure(wrapper.called_list[0]) else: raise finally: wrapper.called_list.pop() if len(wrapper.called_list) == 0: wrapper.stored_used_defaults = 'error' return result wrapper.called_list = [] return wrapper class CifBlock(StarFile.StarBlock): """ A class to hold a single block of a CIF file. A `CifBlock` object can be treated as a Python dictionary, in particular, individual items can be accessed using square brackets e.g. `b['_a_dataname']`. All other Python dictionary methods are also available (e.g. `keys()`, `values()`). Looped datanames will return a list of values. ## Initialisation When provided, `data` should be another `CifBlock` whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs): """When provided, `data` should be another CifBlock whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ if strict: maxnamelength=75 else: maxnamelength=-1 super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs) self.dictionary = None #DDL dictionary referring to this block self.compat_mode = compat_mode #old-style behaviour of setitem def RemoveCifItem(self,itemname): """Remove `itemname` from the CifBlock""" self.RemoveItem(itemname) def __setitem__(self,key,value): self.AddItem(key,value) # for backwards compatibility make a single-element loop if self.compat_mode: if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList): # single element loop self.CreateLoop([key]) def copy(self): newblock = super(CifBlock,self).copy() return self.copy.im_class(newblock) #catch inheritance def AddCifItem(self,data): """ *DEPRECATED*. Use `AddItem` instead.""" # we accept only tuples, strings and lists!! if not (isinstance(data[0],(unicode,tuple,list,str))): raise TypeError('Cif datanames are either a string, tuple or list') # we catch single item loops as well... if isinstance(data[0],(unicode,str)): self.AddSingleCifItem(data[0],list(data[1])) if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList): # a single element loop self.CreateLoop([data[0]]) return # otherwise, we loop over the datanames keyvals = zip(data[0][0],[list(a) for a in data[1][0]]) [self.AddSingleCifItem(a,b) for a,b in keyvals] # and create the loop self.CreateLoop(data[0][0]) def AddSingleCifItem(self,key,value): """*Deprecated*. Use `AddItem` instead""" """Add a single data item. If it is part of a loop, a separate call should be made""" self.AddItem(key,value) def loopnames(self): return [self.loops[a] for a in self.loops] class CifFile(StarFile.StarFile): def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs): super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs) self.strict = strict self.header_comment = \ """ ########################################################################## # Crystallographic Information Format file # Produced by PyCifRW module # # This is a CIF file. CIF has been adopted by the International # Union of Crystallography as the standard for data archiving and # transmission. # # For information on this file format, follow the CIF links at # http://www.iucr.org ########################################################################## """ class CifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Format error: '+ self.value class ValidCifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Validity error: ' + self.value class CifRecursionError(Exception): def __init__(self,key_value,call_stack): self.key_value = key_value self.call_stack = call_stack def __str__(self): return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack)) class DicBlock(StarFile.StarBlock): """A definition block within a dictionary, which allows imports to be transparently followed""" def __init__(self,*args,**kwargs): super(DicBlock,self).__init__(*args,**kwargs) self._import_cache = {} def __getitem__(self,dataname): value = None if super(DicBlock,self).has_key("_import.get") and self._import_cache: value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) try: final_value = super(DicBlock,self).__getitem__(dataname) except KeyError: #not there final_value = value if final_value is None: raise KeyError("%s not found" % dataname) return final_value def has_key(self,key): try: self[key] except KeyError: return False return True def add_dict_cache(self,name,cached): """Add a loaded dictionary to this block's cache""" self._import_cache[name]=cached def follow_import(self,import_info,dataname): """Find the dataname values from the imported dictionary. `import_info` is a list of import locations""" latest_value = None for import_ref in import_info: file_loc = import_ref["file"] if file_loc not in self._import_cache: raise ValueError("Dictionary for import %s not found" % file_loc) import_from = self._import_cache[file_loc] miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,file_loc)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if mode == "contents": #only this is used at this level latest_value = import_target.get(dataname,latest_value) return latest_value class CifDic(StarFile.StarFile): """Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only): * do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False * do_imports = No/Full/Contents/All: If not 'No', intepret _import.get statements for Full mode/Contents mode/Both respectively. See also option 'heavy' * do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All * heavy = True/False: (Experimental). If True, importation overwrites definitions. If False, attributes are resolved dynamically. """ def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True, grammar='auto',heavy=True,**kwargs): self.do_minimum = do_minimum if do_minimum: do_imports = 'No' do_dREL = False if do_dREL: do_imports = 'All' if heavy == 'Light' and do_imports not in ('contents','No'): raise(ValueError,"Light imports only available for mode 'contents'") self.template_cache = {} #for DDLm imports self.ddlm_functions = {} #for DDLm functions self.switch_numpy(False) #no Numpy arrays returned super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs) self.standard = 'Dic' #for correct output order self.scoping = 'dictionary' (self.dicname,self.diclang) = self.dic_determine() print('%s is a %s dictionary' % (self.dicname,self.diclang)) self.scopes_mandatory = {} self.scopes_naughty = {} # rename and expand out definitions using "_name" in DDL dictionaries if self.diclang == "DDL1": self.DDL1_normalise() #this removes any non-definition entries self.create_def_block_table() #From now on, [] uses definition_id if self.diclang == "DDL1": self.ddl1_cat_load() elif self.diclang == "DDL2": self.DDL2_normalise() #iron out some DDL2 tricky bits elif self.diclang == "DDLm": self.scoping = 'dictionary' #expose all save frames if do_imports is not 'No': self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine self.create_alias_table() self.create_cat_obj_table() self.create_cat_key_table() if do_dREL: print('Doing full dictionary initialisation') self.initialise_drel() self.add_category_info(full=do_dREL) # initialise type information self.typedic={} self.primdic = {} #typecode<->primitive type translation self.add_type_info() self.install_validation_functions() def dic_determine(self): if "on_this_dictionary" in self: self.master_block = super(CifDic,self).__getitem__("on_this_dictionary") self.def_id_spec = "_name" self.cat_id_spec = "_category.id" #we add this ourselves self.type_spec = "_type" self.enum_spec = "_enumeration" self.cat_spec = "_category" self.esd_spec = "_type_conditions" self.must_loop_spec = "_list" self.must_exist_spec = "_list_mandatory" self.list_ref_spec = "_list_reference" self.key_spec = "_list_mandatory" self.unique_spec = "_list_uniqueness" self.child_spec = "_list_link_child" self.parent_spec = "_list_link_parent" self.related_func = "_related_function" self.related_item = "_related_item" self.primitive_type = "_type" self.dep_spec = "xxx" self.cat_list = [] #to save searching all the time name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"] version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"] return (name+version,"DDL1") elif len(self.get_roots()) == 1: # DDL2/DDLm self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0]) # now change to dictionary scoping self.scoping = 'dictionary' name = self.master_block["_dictionary.title"] version = self.master_block["_dictionary.version"] if self.master_block.has_key("_dictionary.class"): #DDLm self.enum_spec = '_enumeration_set.state' self.key_spec = '_category.key_id' self.must_exist_spec = None self.cat_spec = '_name.category_id' self.primitive_type = '_type.contents' self.cat_id_spec = "_definition.id" self.def_id_spec = "_definition.id" return(name+version,"DDLm") else: #DDL2 self.cat_id_spec = "_category.id" self.def_id_spec = "_item.name" self.key_spec = "_category_mandatory.name" self.type_spec = "_item_type.code" self.enum_spec = "_item_enumeration.value" self.esd_spec = "_item_type_conditions.code" self.cat_spec = "_item.category_id" self.loop_spec = "there_is_no_loop_spec!" self.must_loop_spec = "xxx" self.must_exist_spec = "_item.mandatory_code" self.child_spec = "_item_linked.child_name" self.parent_spec = "_item_linked.parent_name" self.related_func = "_item_related.function_code" self.related_item = "_item_related.related_name" self.unique_spec = "_category_key.name" self.list_ref_spec = "xxx" self.primitive_type = "_type" self.dep_spec = "_item_dependent.dependent_name" return (name+version,"DDL2") else: raise CifError("Unable to determine dictionary DDL version") def DDL1_normalise(self): # switch off block name collision checks self.standard = None # add default type information in DDL2 style # initial types and constructs base_types = ["char","numb","null"] prim_types = base_types[:] base_constructs = [".*", '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.', "\"\" "] for key,value in self.items(): newnames = [key] #keep by default if "_name" in value: real_name = value["_name"] if isinstance(real_name,list): #looped values for looped_name in real_name: new_value = value.copy() new_value["_name"] = looped_name #only looped name self[looped_name] = new_value newnames = real_name else: self[real_name] = value newnames = [real_name] # delete the old one if key not in newnames: del self[key] # loop again to normalise the contents of each definition for key,value in self.items(): #unlock the block save_overwrite = value.overwrite value.overwrite = True # deal with a missing _list, _type_conditions if "_list" not in value: value["_list"] = 'no' if "_type_conditions" not in value: value["_type_conditions"] = 'none' # deal with enumeration ranges if "_enumeration_range" in value: max,min = self.getmaxmin(value["_enumeration_range"]) if min == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min)))) elif max == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min)))) else: self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min)))) #add any type construct information if "_type_construct" in value: base_types.append(value["_name"]+"_type") #ie dataname_type base_constructs.append(value["_type_construct"]+"$") prim_types.append(value["_type"]) #keep a record value["_type"] = base_types[-1] #the new type name #make categories conform with ddl2 #note that we must remove everything from the last underscore if value.get("_category",None) == "category_overview": last_under = value["_name"].rindex("_") catid = value["_name"][1:last_under] value["_category.id"] = catid #remove square bracks if catid not in self.cat_list: self.cat_list.append(catid) value.overwrite = save_overwrite # we now add any missing categories before filling in the rest of the # information for key,value in self.items(): #print('processing ddl1 definition %s' % key) if "_category" in self[key]: if self[key]["_category"] not in self.cat_list: # rogue category, add it in newcat = self[key]["_category"] fake_name = "_" + newcat + "_[]" newcatdata = CifBlock() newcatdata["_category"] = "category_overview" newcatdata["_category.id"] = newcat newcatdata["_type"] = "null" self[fake_name] = newcatdata self.cat_list.append(newcat) # write out the type information in DDL2 style self.master_block.AddLoopItem(( ("_item_type_list.code","_item_type_list.construct", "_item_type_list.primitive_code"), (base_types,base_constructs,prim_types) )) def ddl1_cat_load(self): deflist = self.keys() #slight optimization cat_mand_dic = {} cat_unique_dic = {} # a function to extract any necessary information from each definition def get_cat_info(single_def): if self[single_def].get(self.must_exist_spec)=='yes': thiscat = self[single_def]["_category"] curval = cat_mand_dic.get(thiscat,[]) curval.append(single_def) cat_mand_dic[thiscat] = curval # now the unique items... # cif_core.dic throws us a curly one: the value of list_uniqueness is # not the same as the defined item for publ_body_label, so we have # to collect both together. We assume a non-listed entry, which # is true for all current (May 2005) ddl1 dictionaries. if self[single_def].get(self.unique_spec,None)!=None: thiscat = self[single_def]["_category"] new_unique = self[single_def][self.unique_spec] uis = cat_unique_dic.get(thiscat,[]) if single_def not in uis: uis.append(single_def) if new_unique not in uis: uis.append(new_unique) cat_unique_dic[thiscat] = uis [get_cat_info(a) for a in deflist] # apply the above function for cat in cat_mand_dic.keys(): self[cat]["_category_mandatory.name"] = cat_mand_dic[cat] for cat in cat_unique_dic.keys(): self[cat]["_category_key.name"] = cat_unique_dic[cat] def create_pcloop(self,definition): old_children = self[definition].get('_item_linked.child_name',[]) old_parents = self[definition].get('_item_linked.parent_name',[]) if isinstance(old_children,unicode): old_children = [old_children] if isinstance(old_parents,unicode): old_parents = [old_parents] if (len(old_children)==0 and len(old_parents)==0) or \ (len(old_children) > 1 and len(old_parents)>1): return if len(old_children)==0: old_children = [definition]*len(old_parents) if len(old_parents)==0: old_parents = [definition]*len(old_children) newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) try: del self[definition]['_item_linked.parent_name'] del self[definition]['_item_linked.child_name'] except KeyError: pass self[definition].insert_loop(newloop) def DDL2_normalise(self): listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys()) # now filter out all the single element lists! dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs) for item_def in dodgy_defs: # print("DDL2 norm: processing %s" % item_def) thisdef = self[item_def] packet_no = thisdef['_item.name'].index(item_def) realcat = thisdef['_item.category_id'][packet_no] realmand = thisdef['_item.mandatory_code'][packet_no] # first add in all the missing categories # we don't replace the entry in the list corresponding to the # current item, as that would wipe out the information we want for child_no in range(len(thisdef['_item.name'])): if child_no == packet_no: continue child_name = thisdef['_item.name'][child_no] child_cat = thisdef['_item.category_id'][child_no] child_mand = thisdef['_item.mandatory_code'][child_no] if child_name not in self: self[child_name] = CifBlock() self[child_name]['_item.name'] = child_name self[child_name]['_item.category_id'] = child_cat self[child_name]['_item.mandatory_code'] = child_mand self[item_def]['_item.name'] = item_def self[item_def]['_item.category_id'] = realcat self[item_def]['_item.mandatory_code'] = realmand target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \ '_item_linked.parent_name' in self[a]] # now dodgy_defs contains all definition blocks with more than one child/parent link for item_def in dodgy_defs: self.create_pcloop(item_def) #regularise appearance for item_def in dodgy_defs: print('Processing %s' % item_def) thisdef = self[item_def] child_list = thisdef['_item_linked.child_name'] parents = thisdef['_item_linked.parent_name'] # for each parent, find the list of children. family = list(zip(parents,child_list)) notmychildren = family #We aim to remove non-children # Loop over the parents, relocating as necessary while len(notmychildren): # get all children of first entry mychildren = [a for a in family if a[0]==notmychildren[0][0]] print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren))) for parent,child in mychildren: #parent is the same for all # Make sure that we simply add in the new entry for the child, not replace it, # otherwise we might spoil the child entry loop structure try: childloop = self[child].GetLoop('_item_linked.parent_name') except KeyError: print('Creating new parent entry %s for definition %s' % (parent,child)) self[child]['_item_linked.parent_name'] = [parent] childloop = self[child].GetLoop('_item_linked.parent_name') childloop.AddLoopItem(('_item_linked.child_name',[child])) continue else: # A parent loop already exists and so will a child loop due to the # call to create_pcloop above pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child] goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent] if len(goodpars)>0: #no need to add it print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child)) continue print('Adding %s to %s entry' % (parent,child)) newpacket = childloop.GetPacket(0) #essentially a copy, I hope setattr(newpacket,'_item_linked.child_name',child) setattr(newpacket,'_item_linked.parent_name',parent) childloop.AddPacket(newpacket) # # Make sure the parent also points to the children. We get # the current entry, then add our # new values if they are not there already # parent_name = mychildren[0][0] old_children = self[parent_name].get('_item_linked.child_name',[]) old_parents = self[parent_name].get('_item_linked.parent_name',[]) oldfamily = zip(old_parents,old_children) newfamily = [] print('Old parents -> %s' % repr(old_parents)) for jj, childname in mychildren: alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname] if len(alreadythere)>0: continue 'Adding new child %s to parent definition at %s' % (childname,parent_name) old_children.append(childname) old_parents.append(parent_name) # Now output the loop, blowing away previous definitions. If there is something # else in this category, we are destroying it. newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) del self[parent_name]['_item_linked.parent_name'] del self[parent_name]['_item_linked.child_name'] self[parent_name].insert_loop(newloop) print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name'])) # now make a new,smaller list notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]] # now flatten any single element lists single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs) for flat_def in single_defs: flat_keys = self[flat_def].GetLoop('_item.name').keys() for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0] # now deal with the multiple lists # next we do aliases all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')] for aliased in all_aliases: my_aliases = listify(self[aliased]['_item_aliases.alias_name']) for alias in my_aliases: self[alias] = self[aliased].copy() #we are going to delete stuff... del self[alias]["_item_aliases.alias_name"] def ddlm_parse_valid(self): if "_dictionary_valid.application" not in self.master_block: return for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"): scope = getattr(scope_pack,"_dictionary_valid.application") valid_info = getattr(scope_pack,"_dictionary_valid.attributes") if scope[1] == "Mandatory": self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info) elif scope[1] == "Prohibited": self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info) def obtain_imports(self,import_mode,heavy=False): """Collate import information""" self._import_dics = [] import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]]) print('Import mode %s applied to following frames' % import_mode) print(str([a[0] for a in import_frames])) if import_mode != 'All': for i in range(len(import_frames)): import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()]) print('Importing following frames in mode %s' % import_mode) print(str(import_frames)) #resolve all references for parent_block,import_list in import_frames: for import_ref in import_list: file_loc = import_ref["file"] full_uri = self.resolve_path(file_loc) if full_uri not in self.template_cache: dic_as_cif = CifFile(full_uri,grammar=self.grammar) self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False) #this will recurse internal imports print('Added %s to cached dictionaries' % full_uri) import_from = self.template_cache[full_uri] dupl = import_ref.get('dupl','Exit') miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,full_uri)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if target_key in self and mode=='full': #so blockname will be duplicated if dupl == 'Exit': raise CifError('Import frame %s already in dictionary' % target_key) elif dupl == 'Ignore': continue if heavy: self.ddlm_import(parent_block,import_from,import_target,target_key,mode) else: self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode) def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'): """Import other dictionaries in place""" if mode == 'contents': #merge attributes only self[parent_block].merge(import_target) elif mode =="full": # Do the syntactic merge syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting from_cat_head = import_target['_name.object_id'] child_frames = import_from.ddlm_all_children(from_cat_head) # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False child_frames.remove(from_cat_head) # As we are in syntax land, we call the CifFile methods child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames]) child_blocks = super(CifDic,import_from).makebc(child_blocks) # Prune out any datablocks that have identical definitions from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()]) double_defs = list([b for b in from_defs.items() if self.has_key(b[1])]) print('Definitions for %s superseded' % repr(double_defs)) for b in double_defs: del child_blocks[b[0]] super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head) # print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames), mode,len(self))) # Now the semantic merge # First expand our definition <-> blockname tree self.create_def_block_table() merging_cat = self[parent_block]['_name.object_id'] #new parent if head_to_head: child_frames = self.ddlm_immediate_children(from_cat_head) #old children #the new parent is the importing category for all old children for f in child_frames: self[f].overwrite = True self[f]['_name.category_id'] = merging_cat self[f].overwrite = False # remove the old head del self[from_cat_head] print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat)) else: #imported category is only child from_frame = import_from[target_key]['_definition.id'] #so we can find it child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0] self[child_frame]['_name.category_id'] = merging_cat print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat)) # it will never happen again... del self[parent_block]["_import.get"] def resolve_path(self,file_loc): url_comps = urlparse(file_loc) if url_comps[0]: return file_loc #already full URI new_url = urljoin(self.my_uri,file_loc) #print("Transformed %s to %s for import " % (file_loc,new_url)) return new_url def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'): """Register the imported dictionaries but do not alter any definitions. `parent_block` contains the id of the block that is importing. `import_target` is the block that should be imported. `import_from` is the CifFile that contains the definitions.""" if mode == 'contents': #merge attributes only self[parent_block].add_dict_cache(file_loc,import_from) elif mode =="full": # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False # Figure out the actual definition ID head_id = import_target["_definition.id"] # Adjust parent information merging_cat = self[parent_block]['_name.object_id'] from_cat_head = import_target['_name.object_id'] if not head_to_head: # imported category is only child import_target["_name.category_id"]=merging_cat self._import_dics = [(import_from,head_id)]+self._import_dics #prepend def lookup_imports(self,key): """Check the list of imported dictionaries for this definition""" for one_dic,head_def in self._import_dics: from_cat_head = one_dic[head_def]['_name.object_id'] possible_keys = one_dic.ddlm_all_children(from_cat_head) if key in possible_keys: return one_dic[key] raise KeyError("%s not found in import dictionaries" % key) def create_def_block_table(self): """ Create an internal table matching definition to block id """ proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()] # now get the actual ids instead of blocks proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table]) # remove non-definitions if self.diclang != "DDL1": top_blocks = list([a[0].lower() for a in self.get_roots()]) else: top_blocks = ["on_this_dictionary"] # catch dodgy duplicates uniques = set([a[0] for a in proto_table]) if len(uniques)<len(proto_table): def_names = list([a[0] for a in proto_table]) dodgy = [a for a in def_names if def_names.count(a)>1] raise CifError('Duplicate definitions in dictionary:' + repr(dodgy)) self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks]) def __getitem__(self,key): """Access a datablock by definition id, after the lookup has been created""" try: return super(CifDic,self).__getitem__(self.block_id_table[key.lower()]) except AttributeError: #block_id_table not present yet return super(CifDic,self).__getitem__(key) except KeyError: # key is missing try: # print(Definition for %s not found, reverting to CifFile' % key) return super(CifDic,self).__getitem__(key) except KeyError: # try imports return self.lookup_imports(key) def __setitem__(self,key,value): """Add a new definition block""" super(CifDic,self).__setitem__(key,value) try: self.block_id_table[value['_definition.id']]=key except AttributeError: #does not exist yet pass def NewBlock(self,*args,**kwargs): newname = super(CifDic,self).NewBlock(*args,**kwargs) try: self.block_id_table[self[newname]['_definition.id']]=newname except AttributeError: #no block_id table pass def __delitem__(self,key): """Remove a definition""" try: super(CifDic,self).__delitem__(self.block_id_table[key.lower()]) del self.block_id_table[key.lower()] except (AttributeError,KeyError): #block_id_table not present yet super(CifDic,self).__delitem__(key) return # fix other datastructures # cat_obj table def keys(self): """Return all definitions""" try: return self.block_id_table.keys() except AttributeError: return super(CifDic,self).keys() def has_key(self,key): return key in self def __contains__(self,key): try: return key.lower() in self.block_id_table except AttributeError: return super(CifDic,self).__contains__(key) def items(self): """Return (key,value) pairs""" return list([(a,self[a]) for a in self.keys()]) def unlock(self): """Allow overwriting of all definitions in this collection""" for a in self.keys(): self[a].overwrite=True def lock(self): """Disallow changes in definitions""" for a in self.keys(): self[a].overwrite=False def rename(self,oldname,newname,blockname_as_well=True): """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True, change the underlying blockname too.""" if blockname_as_well: super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname) self.block_id_table[newname.lower()]=newname if oldname.lower() in self.block_id_table: #not removed del self.block_id_table[oldname.lower()] else: self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()] del self.block_id_table[oldname.lower()] return def get_root_category(self): """Get the single 'Head' category of this dictionary""" root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head'] if len(root_cats)>1 or len(root_cats)==0: raise CifError("Cannot determine a unique Head category, got" % repr(root_cats)) return root_cats[0] def ddlm_immediate_children(self,catname): """Return a list of datanames for the immediate children of catname. These are semantic children (i.e. based on _name.category_id), not structural children as in the case of StarFile.get_immediate_children""" straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()] return list(straight_children) def ddlm_all_children(self,catname): """Return a list of all children, including the `catname`""" all_children = self.ddlm_immediate_children(catname) cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category'] for c in cat_children: all_children.remove(c) all_children += self.ddlm_all_children(c) return all_children + [catname] def is_semantic_child(self,parent,maybe_child): """Return true if `maybe_child` is a child of `parent`""" all_children = self.ddlm_all_children(parent) return maybe_child in all_children def ddlm_danglers(self): """Return a list of definitions that do not have a category defined for them, or are children of an unattached category""" top_block = self.get_root_category() connected = set(self.ddlm_all_children(top_block)) all_keys = set(self.keys()) unconnected = all_keys - connected return list(unconnected) def get_ddlm_parent(self,itemname): """Get the parent category of itemname""" parent = self[itemname].get('_name.category_id','') if parent == '': # use the top block by default raise CifError("%s has no parent" % itemname) return parent def expand_category_opt(self,name_list): """Return a list of all non-category items in a category or return the name if the name is not a category""" new_list = [] for name in name_list: if self.get(name,{}).get('_definition.scope','Item') == 'Category': new_list += self.expand_category_opt([a for a in self.keys() if \ self[a].get('_name.category_id','').lower() == name.lower()]) else: new_list.append(name) return new_list def get_categories(self): """Return a list of category names""" return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category']) def names_in_cat(self,cat,names_only=False): names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()] if not names_only: return list([a for a in names if self[a].get('_definition.scope','Item')=='Item']) else: return list([self[a]["_name.object_id"] for a in names]) def create_alias_table(self): """Populate an alias table that we can look up when searching for a dataname""" all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]] self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases]) def create_cat_obj_table(self): """Populate a table indexed by (cat,obj) and returning the correct dataname""" base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \ for a in self.keys() if self[a].get('_definition.scope','Item')=='Item']) loopable = self.get_loopable_cats() loopers = [self.ddlm_immediate_children(a) for a in loopable] print('Loopable cats:' + repr(loopable)) loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers] expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0]) print("Expansion list:" + repr(expand_list)) extra_table = {} #for debugging we keep it separate from base_table until the end def expand_base_table(parent_cat,child_cats): extra_names = [] # first deal with all the child categories for child_cat in child_cats: nn = [] if child_cat in expand_list: # a nested category: grab its names nn = expand_base_table(child_cat,expand_list[child_cat]) # store child names extra_names += nn # add all child names to the table child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key'] child_names += extra_names extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table])) # and the repeated ones get appended instead repeats = [a for a in child_names if a in extra_table] for obj,name in repeats: extra_table[(parent_cat,obj)] += [name] # and finally, add our own names to the return list child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key'] return child_names [expand_base_table(parent,child) for parent,child in expand_list.items()] print('Expansion cat/obj values: ' + repr(extra_table)) # append repeated ones non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table]) repeats = [a for a in extra_table.keys() if a in base_table] base_table.update(non_repeats) for k in repeats: base_table[k] += extra_table[k] self.cat_obj_lookup_table = base_table self.loop_expand_list = expand_list def get_loopable_cats(self): """A short utility function which returns a list of looped categories. This is preferred to a fixed attribute as that fixed attribute would need to be updated after any edits""" return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop'] def create_cat_key_table(self): """Create a utility table with a list of keys applicable to each category. A key is a compound key, that is, it is a list""" self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name", [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()]) def collect_keys(parent_cat,child_cats): kk = [] for child_cat in child_cats: if child_cat in self.loop_expand_list: kk += collect_keys(child_cat) # add these keys to our list kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))] self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk return kk for k,v in self.loop_expand_list.items(): collect_keys(k,v) print('Keys for categories' + repr(self.cat_key_table)) def add_type_info(self): if "_item_type_list.construct" in self.master_block: types = self.master_block["_item_type_list.code"] prim_types = self.master_block["_item_type_list.primitive_code"] constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]]) # add in \r wherever we see \n, and change \{ to \\{ def regex_fiddle(mm_regex): brack_match = r"((.*\[.+)(\\{)(.*\].*))" ret_match = r"((.*\[.+)(\\n)(.*\].*))" fixed_regexp = mm_regex[:] #copy # fix the brackets bm = re.match(brack_match,mm_regex) if bm != None: fixed_regexp = bm.expand(r"\2\\\\{\4") # fix missing \r rm = re.match(ret_match,fixed_regexp) if rm != None: fixed_regexp = rm.expand(r"\2\3\\r\4") #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp)) return fixed_regexp constructs = map(regex_fiddle,constructs) for typecode,construct in zip(types,constructs): self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL) # now make a primitive <-> type construct mapping for typecode,primtype in zip(types,prim_types): self.primdic[typecode] = primtype def add_category_info(self,full=True): if self.diclang == "DDLm": catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category'] looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop'] self.parent_lookup = {} for one_cat in looped_cats: parent_cat = one_cat parent_def = self[parent_cat] next_up = parent_def['_name.category_id'].lower() while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop': parent_def = self[next_up] parent_cat = next_up next_up = parent_def['_name.category_id'].lower() self.parent_lookup[one_cat] = parent_cat if full: self.key_equivs = {} for one_cat in looped_cats: #follow them up lower_keys = listify(self[one_cat]['_category_key.name']) start_keys = lower_keys[:] while len(lower_keys)>0: this_cat = self[lower_keys[0]]['_name.category_id'] parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a] #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent))) if len(parent)>1: raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent))) if len(parent)==0: break parent = parent[0] parent_keys = listify(self[parent]['_category_key.name']) linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys] # sanity check if set(parent_keys) != set(linked_keys): raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys)) # now add in our information for parent,child in zip(linked_keys,start_keys): self.key_equivs[child] = self.key_equivs.get(child,[])+[parent] lower_keys = linked_keys #preserves order of start keys else: self.parent_lookup = {} self.key_equivs = {} def change_category_name(self,oldname,newname): self.unlock() """Change the category name from [[oldname]] to [[newname]]""" if oldname not in self: raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname)) if newname in self: raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname)) child_defs = self.ddlm_immediate_children(oldname) self.rename(oldname,newname) #NB no name integrity checks self[newname]['_name.object_id']=newname self[newname]['_definition.id']=newname for child_def in child_defs: self[child_def]['_name.category_id'] = newname if self[child_def].get('_definition.scope','Item')=='Item': newid = self.create_catobj_name(newname,self[child_def]['_name.object_id']) self[child_def]['_definition.id']=newid self.rename(child_def,newid[1:]) #no underscore at the beginning self.lock() def create_catobj_name(self,cat,obj): """Combine category and object in approved fashion to create id""" return ('_'+cat+'.'+obj) def change_category(self,itemname,catname): """Move itemname into catname, return new handle""" defid = self[itemname] if defid['_name.category_id'].lower()==catname.lower(): print('Already in category, no change') return itemname if catname not in self: #don't have it print('No such category %s' % catname) return itemname self.unlock() objid = defid['_name.object_id'] defid['_name.category_id'] = catname newid = itemname # stays the same for categories if defid.get('_definition.scope','Item') == 'Item': newid = self.create_catobj_name(catname,objid) defid['_definition.id']= newid self.rename(itemname,newid) self.set_parent(catname,newid) self.lock() return newid def change_name(self,one_def,newobj): """Change the object_id of one_def to newobj. This is not used for categories, but can be used for dictionaries""" if '_dictionary.title' not in self[one_def]: #a dictionary block newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj) self.unlock() self.rename(one_def,newid) self[newid]['_definition.id']=newid self[newid]['_name.object_id']=newobj else: self.unlock() newid = newobj self.rename(one_def,newobj) self[newid]['_dictionary.title'] = newid self.lock() return newid # Note that our semantic parent is given by catparent, but our syntactic parent is # always just the root block def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False): """Add a new category to the dictionary with name [[catname]]. If [[catparent]] is None, the category will be a child of the topmost 'Head' category or else the top data block. If [[is_loop]] is false, a Set category is created. If [[allow_dangler]] is true, the parent category does not have to exist.""" if catname in self: raise CifError('Attempt to add existing category %s' % catname) self.unlock() syntactic_root = self.get_roots()[0][0] if catparent is None: semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head'] if len(semantic_root)>0: semantic_root = semantic_root[0] else: semantic_root = syntactic_root else: semantic_root = catparent realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root) self.block_id_table[catname.lower()]=realname self[catname]['_name.object_id'] = catname if not allow_dangler or catparent is None: self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id'] else: self[catname]['_name.category_id'] = catparent self[catname]['_definition.id'] = catname self[catname]['_definition.scope'] = 'Category' if is_loop: self[catname]['_definition.class'] = 'Loop' else: self[catname]['_definition.class'] = 'Set' self[catname]['_description.text'] = 'No definition provided' self.lock() return catname def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False): """Add itemname to category [[catparent]]. If itemname contains periods, all text before the final period is ignored. If [[allow_dangler]] is True, no check for a parent category is made.""" self.unlock() if '.' in itemname: objname = itemname.split('.')[-1] else: objname = itemname objname = objname.strip('_') if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'): raise CifError('No category %s in dictionary' % catparent) fullname = '_'+catparent.lower()+'.'+objname print('New name: %s' % fullname) syntactic_root = self.get_roots()[0][0] realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change # update our dictionary structures self.block_id_table[fullname]=realname self[fullname]['_definition.id']=fullname self[fullname]['_name.object_id']=objname self[fullname]['_name.category_id']=catparent self[fullname]['_definition.class']='Datum' self[fullname]['_description.text']=def_text def remove_definition(self,defname): """Remove a definition from the dictionary.""" if defname not in self: return if self[defname].get('_definition.scope')=='Category': children = self.ddlm_immediate_children(defname) [self.remove_definition(a) for a in children] cat_id = self[defname]['_definition.id'].lower() del self[defname] def get_cat_obj(self,name): """Return (cat,obj) tuple. [[name]] must contain only a single period""" cat,obj = name.split('.') return (cat.strip('_'),obj) def get_name_by_cat_obj(self,category,object,give_default=False): """Return the dataname corresponding to the given category and object""" if category[0] == '_': #accidentally left in true_cat = category[1:].lower() else: true_cat = category.lower() try: return self.cat_obj_lookup_table[(true_cat,object.lower())][0] except KeyError: if give_default: return '_'+true_cat+'.'+object raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object)) def WriteOut(self,**kwargs): myblockorder = self.get_full_child_list() self.set_grammar(self.grammar) self.standard = 'Dic' return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs) def get_full_child_list(self): """Return a list of definition blocks in order parent-child-child-child-parent-child...""" top_block = self.get_roots()[0][0] root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head'] if len(root_cat) == 1: all_names = [top_block] + self.recurse_child_list(root_cat[0]) unrooted = self.ddlm_danglers() double_names = set(unrooted).intersection(set(all_names)) if len(double_names)>0: raise CifError('Names are children of internal and external categories:%s' % repr(double_names)) remaining = unrooted[:] for no_root in unrooted: if self[no_root].get('_definition.scope','Item')=='Category': all_names += [no_root] remaining.remove(no_root) these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()] all_names += these_children [remaining.remove(n) for n in these_children] # now sort by category ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining]) for e in ext_cats: cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e] [remaining.remove(n) for n in cat_items] all_names += cat_items if len(remaining)>0: print('WARNING: following items do not seem to belong to a category??') print(repr(remaining)) all_names += remaining print('Final block order: ' + repr(all_names)) return all_names raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead') def cat_from_name(self,one_name): """Guess the category from the name. This should be used only when this is not important semantic information, for example, when printing out""" (cat,obj) = one_name.split(".") if cat[0] == "_": cat = cat[1:] return cat def recurse_child_list(self,parentname): """Recursively expand the logical child list of [[parentname]]""" final_list = [parentname] child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()] child_blocks.sort() #we love alphabetical order child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item'] final_list += child_items child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category'] for child_cat in child_cats: final_list += self.recurse_child_list(child_cat) return final_list def get_key_pack(self,category,value,data): keyname = self[category][self.unique_spec] onepack = data.GetPackKey(keyname,value) return onepack def get_number_with_esd(numstring): numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' our_match = re.match(numb_re,numstring) if our_match: a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) else: return None,None if dot or q: return None,None #a dot or question mark if exp: #has exponent exp = exp.replace("d","e") # mop up old fashioned numbers exp = exp.replace("D","e") base_num = base_num + exp # print("Debug: have %s for base_num from %s" % (base_num,numstring)) base_num = float(base_num) # work out esd, if present. if esd: esd = float(esd[1:-1]) # no brackets if dad: # decimal point + digits esd = esd * (10 ** (-1* len(dad))) if exp: esd = esd * (10 ** (float(exp[1:]))) return base_num,esd def getmaxmin(self,rangeexp): regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' regexp = regexp + ":" + regexp regexp = re.match(regexp,rangeexp) try: minimum = regexp.group(1) maximum = regexp.group(7) except AttributeError: print("Can't match %s" % rangeexp) if minimum == None: minimum = "." else: minimum = float(minimum) if maximum == None: maximum = "." else: maximum = float(maximum) return maximum,minimum def initialise_drel(self): """Parse drel functions and prepare data structures in dictionary""" self.ddlm_parse_valid() #extract validity information from data block self.transform_drel() #parse the drel functions self.add_drel_funcs() #put the drel functions into the namespace def transform_drel(self): from .drel import drel_ast_yacc from .drel import py_from_ast import traceback parser = drel_ast_yacc.parser lexer = drel_ast_yacc.lexer my_namespace = self.keys() my_namespace = dict(zip(my_namespace,my_namespace)) # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")] derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \ and self[a].get("_name.category_id","")!= "function"] for derivable in derivable_list: target_id = derivable # reset the list of visible names for parser special_ids = [dict(zip(self.keys(),self.keys()))] print("Target id: %s" % derivable) drel_exprs = self[derivable]["_method.expression"] drel_purposes = self[derivable]["_method.purpose"] all_methods = [] if not isinstance(drel_exprs,list): drel_exprs = [drel_exprs] drel_purposes = [drel_purposes] for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs): if drel_purpose != 'Evaluation': continue drel_expr = "\n".join(drel_expr.splitlines()) # print("Transforming %s" % drel_expr) # List categories are treated differently... try: meth_ast = parser.parse(drel_expr+"\n",lexer=lexer) except: print('Syntax error in method for %s; leaving as is' % derivable) a,b = sys.exc_info()[:2] print((repr(a),repr(b))) print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout)) # reset the lexer lexer.begin('INITIAL') continue # Construct the python method cat_meth = False if self[derivable].get('_definition.scope','Item') == 'Category': cat_meth = True pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id, loopable=loop_info, cif_dic = self,cat_meth=cat_meth) all_methods.append(pyth_meth) if len(all_methods)>0: save_overwrite = self[derivable].overwrite self[derivable].overwrite = True self[derivable]["_method.py_expression"] = all_methods self[derivable].overwrite = save_overwrite #print("Final result:\n " + repr(self[derivable]["_method.py_expression"])) def add_drel_funcs(self): from .drel import drel_ast_yacc from .drel import py_from_ast funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function'] funcnames = [(self[a]["_name.object_id"], getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist] # create executable python code... parser = drel_ast_yacc.parser # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) for funcname,funcbody in funcnames: newline_body = "\n".join(funcbody.splitlines()) parser.target_id = funcname res_ast = parser.parse(newline_body) py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self) #print('dREL library function ->\n' + py_function) global_table = globals() exec(py_function, global_table) #add to namespace #print('Globals after dREL functions added:' + repr(globals())) self.ddlm_functions = globals() #for outside access @track_recursion def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True): key = start_key #starting value result = None #success is a non-None value default_result = False #we have not used a default value # check for aliases # check for an older form of a new value found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata] if len(found_it)>0: corrected_type = self.change_type(key,cifdata[found_it[0]]) return corrected_type # now do the reverse check - any alternative form alias_name = [a for a in self.alias_table.items() if key in a[1]] print('Aliases for %s: %s' % (key,repr(alias_name))) if len(alias_name)==1: key = alias_name[0][0] #actual definition name if key in cifdata: return self.change_type(key,cifdata[key]) found_it = [k for k in alias_name[0][1] if k in cifdata] if len(found_it)>0: return self.change_type(key,cifdata[found_it[0]]) elif len(alias_name)>1: raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name)) the_category = self[key]["_name.category_id"] cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] # store any default value in case we have a problem def_val = self[key].get("_enumeration.default","") def_index_val = self[key].get("_enumeration.def_index_id","") if len(has_cat_names)==0: # try category method cat_result = {} pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]] pulled_from_cats = [(k,[ self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']] ) for k in pulled_from_cats] pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]] if '_category_construct_local.type' in self[the_category]: print("**Now constructing category %s using DDLm attributes**" % the_category) try: cat_result = self.construct_category(the_category,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s (error)' % the_category) # Trying a pull-back when the category is partially populated # will not work, hence we test that cat_result has no keys if len(pulled_to_cats)>0 and len(cat_result)==0: print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats))) try: cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s from pullback information (error)' % the_category) if '_method.py_expression' in self[the_category] and key not in cat_result: print("**Now applying category method for %s in search of %s**" % (the_category,key)) cat_result = self.derive_item(the_category,cifdata,store_value=True) print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result)) # do we now have our value? if key in cat_result: return cat_result[key] # Recalculate in case it actually worked has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] the_funcs = self[key].get('_method.py_expression',"") if the_funcs: #attempt to calculate it #global_table = globals() #global_table.update(self.ddlm_functions) for one_func in the_funcs: print('Executing function for %s:' % key) #print(one_func) exec(one_func, globals()) #will access dREL functions, puts "pyfunc" in scope # print('in following global environment: ' + repr(global_table)) stored_setting = cifdata.provide_value cifdata.provide_value = True try: result = pyfunc(cifdata) except CifRecursionError as s: print(s) result = None except StarFile.StarDerivationError as s: print(s) result = None finally: cifdata.provide_value = stored_setting if result is not None: break #print("Function returned {!r}".format(result)) if result is None and allow_defaults: # try defaults if def_val: result = self.change_type(key,def_val) default_result = True elif def_index_val: #derive a default value index_vals = self[key]["_enumeration_default.index"] val_to_index = cifdata[def_index_val] #what we are keying on if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']: lcase_comp = True index_vals = [a.lower() for a in index_vals] # Handle loops if isinstance(val_to_index,list): if lcase_comp: val_to_index = [a.lower() for a in val_to_index] keypos = [index_vals.index(a) for a in val_to_index] result = [self[key]["_enumeration_default.value"][a] for a in keypos] else: if lcase_comp: val_to_index = val_to_index.lower() keypos = index_vals.index(val_to_index) #value error if no such value available result = self[key]["_enumeration_default.value"][keypos] default_result = True #flag that it must be extended result = self.change_type(key,result) print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index))) # read it in if result is None: #can't do anything else print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults))) raise StarFile.StarDerivationError(start_key) is_looped = False if self[the_category].get('_definition.class','Set')=='Loop': is_looped = True if len(has_cat_names)>0: #this category already exists if result is None or default_result: #need to create a list of values loop_len = len(cifdata[has_cat_names[0]]) out_result = [result]*loop_len result = out_result else: #nothing exists in this category, we can't store this at all print('Resetting result %s for %s to null list as category is empty' % (key,result)) result = [] # now try to insert the new information into the right place # find if items of this category already appear... # Never cache empty values if not (isinstance(result,list) and len(result)==0) and\ store_value: if self[key].get("_definition.scope","Item")=='Item': if is_looped: result = self.store_new_looped_value(key,cifdata,result,default_result) else: result = self.store_new_unlooped_value(key,cifdata,result) else: self.store_new_cat_values(cifdata,result,the_category) return result def store_new_looped_value(self,key,cifdata,result,default_result): """Store a looped value from the dREL system into a CifFile""" # try to change any matrices etc. to lists the_category = self[key]["_name.category_id"] out_result = result if result is not None and not default_result: # find any numpy arrays def conv_from_numpy(one_elem): if not hasattr(one_elem,'dtype'): if isinstance(one_elem,(list,tuple)): return StarFile.StarList([conv_from_numpy(a) for a in one_elem]) return one_elem if one_elem.size > 1: #so is not a float return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()]) else: try: return one_elem.item(0) except: return one_elem out_result = [conv_from_numpy(a) for a in result] # so out_result now contains a value suitable for storage cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if a in cifdata] print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names)) if len(has_cat_names)>0: #this category already exists cifdata[key] = out_result #lengths must match or else!! cifdata.AddLoopName(has_cat_names[0],key) else: cifdata[key] = out_result cifdata.CreateLoop([key]) print('Loop info:' + repr(cifdata.loops)) return out_result def store_new_unlooped_value(self,key,cifdata,result): """Store a single value from the dREL system""" if result is not None and hasattr(result,'dtype'): if result.size > 1: out_result = StarFile.StarList(result.tolist()) cifdata[key] = out_result else: cifdata[key] = result.item(0) else: cifdata[key] = result return result def construct_category(self,category,cifdata,store_value=True): """Construct a category using DDLm attributes""" con_type = self[category].get('_category_construct_local.type',None) if con_type == None: return {} if con_type == 'Pullback' or con_type == 'Filter': morphisms = self[category]['_category_construct_local.components'] morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat cats = [self[a]['_name.category_id'] for a in morphisms] cat_keys = [self[a]['_category.key_id'] for a in cats] cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat if con_type == 'Filter': int_filter = self[category].get('_category_construct_local.integer_filter',None) text_filter = self[category].get('_category_construct_local.text_filter',None) if int_filter is not None: morph_values.append([int(a) for a in int_filter]) if text_filter is not None: morph_values.append(text_filter) cat_values.append(range(len(morph_values[-1]))) # create the mathematical product filtered by equality of dataname values pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \ if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]] # now prepare for return if len(pullback_ids)==0: return {} newids = self[category]['_category_construct_local.new_ids'] fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids] if con_type == 'Pullback': final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids)) elif con_type == 'Filter': #simple filter final_results = {fullnewids[0]:[x[0] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) if store_value: self.store_new_cat_values(cifdata,final_results,category) return final_results def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True): """Each of the categories in source_categories are pullbacks that include the target_category""" target_key = self[target_category]['_category.key_id'] result = {target_key:[]} first_time = True # for each source category, determine which element goes to the target for sc in source_categories: components = self[sc]['_category_construct_local.components'] comp_cats = [self[c]['_name.category_id'] for c in components] new_ids = self[sc]['_category_construct_local.new_ids'] source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids] if len(components) == 2: # not a filter element_pos = comp_cats.index(target_category) old_id = source_ids[element_pos] print('Using %s to populate %s' % (old_id,target_key)) result[target_key].extend(cifdata[old_id]) # project through all identical names extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key]) # we only include keys that are common to all categories if first_time: result.update(extra_result) else: for k in extra_result.keys(): if k in result: print('Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids) if len(extra_result)>0 or source_ids[0] in cifdata: #something is present result[target_key].extend(cifdata[source_ids[0]]) for k in extra_result.keys(): if k in result: print('Reverse filter: Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: result[k]=extra_result[k] # Bonus derivation if there is a singleton filter if self[sc]['_category_construct_local.type'] == 'Filter': int_filter = self[sc].get('_category_construct_local.integer_filter',None) text_filter = self[sc].get('_category_construct_local.text_filter',None) if int_filter is not None: filter_values = int_filter else: filter_values = text_filter if len(filter_values)==1: #a singleton extra_dataname = self[sc]['_category_construct_local.components'][0] if int_filter is not None: new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]]) else: new_value = filter_values * len(cifdata[source_ids[0]]) if extra_dataname not in result: result[extra_dataname] = new_value else: result[extra_dataname].extend(new_value) else: raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type']) first_time = False # check for sanity - all dataname lengths must be identical datalen = len(set([len(a) for a in result.values()])) if datalen != 1: raise AssertionError('Failed to construct equal-length category items,'+ repr(result)) if store_value: print('Now storing ' + repr(result)) self.store_new_cat_values(cifdata,result,target_category) return result def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]): """Copy across datanames for which the from_category key equals [[key_vals]]""" result = {} s_names_in_cat = set(self.names_in_cat(from_category,names_only=True)) t_names_in_cat = set(self.names_in_cat(to_category,names_only=True)) can_project = s_names_in_cat & t_names_in_cat can_project -= set(skip_names) #already dealt with source_key = self[from_category]['_category.key_id'] print('Source dataname set: ' + repr(s_names_in_cat)) print('Target dataname set: ' + repr(t_names_in_cat)) print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project)) for project_name in can_project: full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0] full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0] if key_vals is None: try: result[full_to_name] = cifdata[full_from_name] except StarFile.StarDerivationError: pass else: all_key_vals = cifdata[source_key] filter_pos = [all_key_vals.index(a) for a in key_vals] try: all_data_vals = cifdata[full_from_name] except StarFile.StarDerivationError: pass result[full_to_name] = [all_data_vals[i] for i in filter_pos] return result def store_new_cat_values(self,cifdata,result,the_category): """Store the values in [[result]] into [[cifdata]]""" the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key'] double_names = [a for a in result.keys() if a in cifdata] if len(double_names)>0: already_present = [a for a in self.names_in_cat(the_category) if a in cifdata] if set(already_present) != set(result.keys()): print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys())))) return #check key values old_keys = set(cifdata[the_key]) common_keys = old_keys & set(result[the_key]) if len(common_keys)>0: print("Category %s not updated, key values in common:" % (common_keys)) return #extend result values with old values for one_name,one_value in result.items(): result[one_name].extend(cifdata[one_name]) for one_name, one_value in result.items(): try: self.store_new_looped_value(one_name,cifdata,one_value,False) except StarFile.StarError: print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value))) #put the key as the first item print('Fixing item order for {}'.format(repr(the_key))) for one_key in the_key: #should only be one cifdata.ChangeItemOrder(one_key,0) def generate_default_packet(self,catname,catkey,keyvalue): """Return a StarPacket with items from ``catname`` and a key value of ``keyvalue``""" newpack = StarPacket() for na in self.names_in_cat(catname): def_val = self[na].get("_enumeration.default","") if def_val: final_val = self.change_type(na,def_val) newpack.extend(final_val) setattr(newpack,na,final_val) if len(newpack)>0: newpack.extend(keyvalue) setattr(newpack,catkey,keyvalue) return newpack def switch_numpy(self,to_val): pass def change_type(self,itemname,inval): if inval == "?": return inval change_function = convert_type(self[itemname]) if isinstance(inval,list) and not isinstance(inval,StarFile.StarList): #from a loop newval = list([change_function(a) for a in inval]) else: newval = change_function(inval) return newval def install_validation_functions(self): """Install the DDL-appropriate validation checks""" if self.diclang != 'DDLm': # functions which check conformance self.item_validation_funs = [ self.validate_item_type, self.validate_item_esd, self.validate_item_enum, self.validate_enum_range, self.validate_looping ] # functions checking loop values self.loop_validation_funs = [ self.validate_loop_membership, self.validate_loop_key, self.validate_loop_references ] # where we need to look at other values self.global_validation_funs = [ self.validate_exclusion, self.validate_parent, self.validate_child, self.validate_dependents, self.validate_uniqueness ] # where only a full block will do self.block_validation_funs = [ self.validate_mandatory_category ] # removal is quicker with special checks self.global_remove_validation_funs = [ self.validate_remove_parent_child ] elif self.diclang == 'DDLm': self.item_validation_funs = [ self.validate_item_enum, self.validate_item_esd_ddlm, ] self.loop_validation_funs = [ self.validate_looping_ddlm, self.validate_loop_key_ddlm, self.validate_loop_membership ] self.global_validation_funs = [] self.block_validation_funs = [ self.check_mandatory_items, self.check_prohibited_items ] self.global_remove_validation_funs = [] self.optimize = False # default value self.done_parents = [] self.done_children = [] self.done_keys = [] def validate_item_type(self,item_name,item_value): def mymatch(m,a): res = m.match(a) if res != None: return res.group() else: return "" target_type = self[item_name].get(self.type_spec) if target_type == None: # e.g. a category definition return {"result":True} # not restricted in any way matchexpr = self.typedic[target_type] item_values = listify(item_value) #for item in item_values: #print("Type match " + item_name + " " + item + ":",) #skip dots and question marks check_all = [a for a in item_values if a !="." and a != "?"] check_all = [a for a in check_all if mymatch(matchexpr,a) != a] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def decide(self,result_list): """Construct the return list""" if len(result_list)==0: return {"result":True} else: return {"result":False,"bad_values":result_list} def validate_item_container(self, item_name,item_value): container_type = self[item_name]['_type.container'] item_values = listify(item_value) if container_type == 'Single': okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))] return decide(okcheck) if container_type in ('Multiple','List'): okcheck = [a for a in item_values if not isinstance(a,StarList)] return decide(okcheck) if container_type == 'Array': #A list with numerical values okcheck = [a for a in item_values if not isinstance(a,StarList)] first_check = decide(okcheck) if not first_check['result']: return first_check #num_check = [a for a in item_values if len([b for b in a if not isinstance def validate_item_esd(self,item_name,item_value): if self[item_name].get(self.primitive_type) != 'numb': return {"result":None} can_esd = self[item_name].get(self.esd_spec,"none") == "esd" if can_esd: return {"result":True} #must be OK! item_values = listify(item_value) check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None]) if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_item_esd_ddlm(self,item_name,item_value): if self[item_name].get('self.primitive_type') not in \ ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']: return {"result":None} can_esd = True if self[item_name].get('_type.purpose') != 'Measurand': can_esd = False item_values = listify(item_value) check_all = [get_number_with_esd(a)[1] for a in item_values] check_all = [v for v in check_all if (can_esd and v == None) or \ (not can_esd and v != None)] if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_enum_range(self,item_name,item_value): if "_item_range.minimum" not in self[item_name] and \ "_item_range.maximum" not in self[item_name]: return {"result":None} minvals = self[item_name].get("_item_range.minimum",default = ["."]) maxvals = self[item_name].get("_item_range.maximum",default = ["."]) def makefloat(a): if a == ".": return a else: return float(a) maxvals = map(makefloat, maxvals) minvals = map(makefloat, minvals) rangelist = list(zip(minvals,maxvals)) item_values = listify(item_value) def map_check(rangelist,item_value): if item_value == "?" or item_value == ".": return True iv,esd = get_number_with_esd(item_value) if iv==None: return None #shouldn't happen as is numb type for lower,upper in rangelist: #check the minima if lower == ".": lower = iv - 1 if upper == ".": upper = iv + 1 if iv > lower and iv < upper: return True if upper == lower and iv == upper: return True # debug # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper)) return False check_all = [a for a in item_values if map_check(rangelist,a) != True] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def validate_item_enum(self,item_name,item_value): try: enum_list = self[item_name][self.enum_spec][:] except KeyError: return {"result":None} enum_list.append(".") #default value enum_list.append("?") #unknown item_values = listify(item_value) #print("Enum check: {!r} in {!r}".format(item_values, enum_list)) check_all = [a for a in item_values if a not in enum_list] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def validate_looping(self,item_name,item_value): try: must_loop = self[item_name][self.must_loop_spec] except KeyError: return {"result":None} if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped return {"result":False} #this could be triggered if must_loop == 'no' and not isinstance(item_value,(unicode,str)): return {"result":False} return {"result":True} def validate_looping_ddlm(self,loop_names): """Check that all names are loopable""" truly_loopy = self.get_final_cats(loop_names) if len(truly_loopy)<len(loop_names): #some are bad categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names] not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()] return {"result":False,"bad_items":not_looped} return {"result":True} def validate_loop_membership(self,loop_names): final_cat = self.get_final_cats(loop_names) bad_items = [a for a in final_cat if a != final_cat[0]] if len(bad_items)>0: return {"result":False,"bad_items":bad_items} else: return {"result":True} def get_final_cats(self,loop_names): """Return a list of the uppermost parent categories for the loop_names. Names that are not from loopable categories are ignored.""" try: categories = [self[a][self.cat_spec].lower() for a in loop_names] except KeyError: #category_id is mandatory raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0])) truly_looped = [a for a in categories if a in self.parent_lookup.keys()] return [self.parent_lookup[a] for a in truly_looped] def validate_loop_key(self,loop_names): category = self[loop_names[0]][self.cat_spec] # find any unique values which must be present key_spec = self[category].get(self.key_spec,[]) for names_to_check in key_spec: if isinstance(names_to_check,unicode): #only one names_to_check = [names_to_check] for loop_key in names_to_check: if loop_key not in loop_names: #is this one of those dang implicit items? if self[loop_key].get(self.must_exist_spec,None) == "implicit": continue #it is virtually there... alternates = self.get_alternates(loop_key) if alternates == []: return {"result":False,"bad_items":loop_key} for alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":loop_key} # no alternates return {"result":True} def validate_loop_key_ddlm(self,loop_names): """Make sure at least one of the necessary keys are available""" final_cats = self.get_final_cats(loop_names) if len(final_cats)>0: poss_keys = self.cat_key_table[final_cats[0]][0] # found_keys = [a for a in poss_keys if a in loop_names] if len(found_keys)>0: return {"result":True} else: return {"result":False,"bad_items":poss_keys} else: return {"result":True} def validate_loop_references(self,loop_names): must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names] must_haves = [a for a in must_haves if a != None] # build a flat list. For efficiency we don't remove duplicates,as # we expect no more than the order of 10 or 20 looped names. def flat_func(a,b): if isinstance(b,unicode): a.append(b) #single name else: a.extend(b) #list of names return a flat_mh = [] [flat_func(flat_mh,a) for a in must_haves] group_mh = filter(lambda a:a[-1]=="_",flat_mh) single_mh = filter(lambda a:a[-1]!="_",flat_mh) res = [a for a in single_mh if a not in loop_names] def check_gr(s_item, name_list): nl = map(lambda a:a[:len(s_item)],name_list) if s_item in nl: return True return False res_g = [a for a in group_mh if check_gr(a,loop_names)] if len(res) == 0 and len(res_g) == 0: return {"result":True} # construct alternate list alternates = map(lambda a: (a,self.get_alternates(a)),res) alternates = [a for a in alternates if a[1] != []] # next line purely for error reporting missing_alts = [a[0] for a in alternates if a[1] == []] if len(alternates) != len(res): return {"result":False,"bad_items":missing_alts} #short cut; at least one #doesn't have an altern #loop over alternates for orig_name,alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates return {"result":True} #found alternates def get_alternates(self,main_name,exclusive_only=False): alternates = self[main_name].get(self.related_func,None) alt_names = [] if alternates != None: alt_names = self[main_name].get(self.related_item,None) if isinstance(alt_names,unicode): alt_names = [alt_names] alternates = [alternates] together = zip(alt_names,alternates) if exclusive_only: alt_names = [a for a in together if a[1]=="alternate_exclusive" \ or a[1]=="replace"] else: alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"] alt_names = list([a[0] for a in alt_names]) # now do the alias thing alias_names = listify(self[main_name].get("_item_aliases.alias_name",[])) alt_names.extend(alias_names) # print("Alternates for {}: {!r}".format(main_name, alt_names)) return alt_names def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}): alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)] item_name_list = [a.lower() for a in whole_block.keys()] item_name_list.extend([a.lower() for a in provisional_items.keys()]) bad = [a for a in alternates if a in item_name_list] if len(bad)>0: print("Bad: %s, alternates %s" % (repr(bad),repr(alternates))) return {"result":False,"bad_items":bad} else: return {"result":True} # validate that parent exists and contains matching values def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}): parent_item = self[item_name].get(self.parent_spec) if not parent_item: return {"result":None} #no parent specified if isinstance(parent_item,list): parent_item = parent_item[0] if self.optimize: if parent_item in self.done_parents: return {"result":None} else: self.done_parents.append(parent_item) print("Done parents %s" % repr(self.done_parents)) # initialise parent/child values if isinstance(item_value,unicode): child_values = [item_value] else: child_values = item_value[:] #copy for safety # track down the parent # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block)) # if globals contains the parent values, we are doing a DDL2 dictionary, and so # we have collected all parent values into the global block - so no need to search # for them elsewhere. # print("Looking for {!r}".format(parent_item)) parent_values = globals.get(parent_item) if not parent_values: parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # go for alternates namespace = whole_block.keys() namespace.extend(provisional_items.keys()) namespace.extend(globals.keys()) alt_names = filter_present(self.get_alternates(parent_item),namespace) if len(alt_names) == 0: if len([a for a in child_values if a != "." and a != "?"])>0: return {"result":False,"parent":parent_item}#no parent available -> error else: return {"result":None} #maybe True is more appropriate?? parent_item = alt_names[0] #should never be more than one?? parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # check global block parent_values = globals.get(parent_item) if isinstance(parent_values,unicode): parent_values = [parent_values] #print("Checking parent %s against %s, values %r/%r" % (parent_item, # item_name, parent_values, child_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing) > 0: return {"result":False,"bad_values":missing,"parent":parent_item} return {"result":True} def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}): try: child_items = self[item_name][self.child_spec][:] #copy except KeyError: return {"result":None} #not relevant # special case for dictionaries -> we check parents of children only if item_name in globals: #dictionary so skip return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] if isinstance(item_value,unicode): # single value parent_values = [item_value] else: parent_values = item_value[:] # expand child list with list of alternates for child_item in child_items[:]: child_items.extend(self.get_alternates(child_item)) # now loop over the children for child_item in child_items: if self.optimize: if child_item in self.done_children: return {"result":None} else: self.done_children.append(child_item) print("Done children %s" % repr(self.done_children)) if child_item in provisional_items: child_values = provisional_items[child_item][:] elif child_item in whole_block: child_values = whole_block[child_item][:] else: continue if isinstance(child_values,unicode): child_values = [child_values] # print("Checking child %s against %s, values %r/%r" % (child_item, # item_name, child_values, parent_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing)>0: return {"result":False,"bad_values":missing,"child":child_item} return {"result":True} #could mean that no child items present #a generic checker: all child vals should appear in parent_vals def check_parent_child(self,parent_vals,child_vals): # shield ourselves from dots and question marks pv = parent_vals[:] pv.extend([".","?"]) res = [a for a in child_vals if a not in pv] #print("Missing: %s" % res) return res def validate_remove_parent_child(self,item_name,whole_block): try: child_items = self[item_name][self.child_spec] except KeyError: return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] for child_item in child_items: if child_item in whole_block: return {"result":False,"child":child_item} return {"result":True} def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}): try: dep_items = self[item_name][self.dep_spec][:] except KeyError: return {"result":None} #not relevant if isinstance(dep_items,unicode): dep_items = [dep_items] actual_names = whole_block.keys() actual_names.extend(prov.keys()) actual_names.extend(globals.keys()) missing = [a for a in dep_items if a not in actual_names] if len(missing) > 0: alternates = map(lambda a:[self.get_alternates(a),a],missing) # compact way to get a list of alternative items which are # present have_check = [(filter_present(b[0],actual_names), b[1]) for b in alternates] have_check = list([a for a in have_check if len(a[0])==0]) if len(have_check) > 0: have_check = [a[1] for a in have_check] return {"result":False,"bad_items":have_check} return {"result":True} def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={}, globals={}): category = self[item_name].get(self.cat_spec) if category == None: print("No category found for %s" % item_name) return {"result":None} # print("Category {!r} for item {}".format(category, item_name)) # we make a copy in the following as we will be removing stuff later! unique_i = self[category].get("_category_key.name",[])[:] if isinstance(unique_i,unicode): unique_i = [unique_i] if item_name not in unique_i: #no need to verify return {"result":None} if isinstance(item_value,unicode): #not looped return {"result":None} # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i)) # check that we can't optimize by not doing this check if self.optimize: if unique_i in self.done_keys: return {"result":None} else: self.done_keys.append(unique_i) val_list = [] # get the matching data from any other data items unique_i.remove(item_name) other_data = [] if len(unique_i) > 0: # i.e. do have others to think about for other_name in unique_i: # we look for the value first in the provisional dict, then the main block # the logic being that anything in the provisional dict overrides the # main block if other_name in provisional_items: other_data.append(provisional_items[other_name]) elif other_name in whole_block: other_data.append(whole_block[other_name]) elif self[other_name].get(self.must_exist_spec)=="implicit": other_data.append([item_name]*len(item_value)) #placeholder else: return {"result":False,"bad_items":other_name}#missing data name # ok, so we go through all of our values # this works by comparing lists of strings to one other, and # so could be fooled if you think that '1.' and '1' are # identical for i in range(len(item_value)): #print("Value no. %d" % i, end=" ") this_entry = item_value[i] for j in range(len(other_data)): this_entry = " ".join([this_entry,other_data[j][i]]) #print("Looking for {!r} in {!r}: ".format(this_entry, val_list)) if this_entry in val_list: return {"result":False,"bad_values":this_entry} val_list.append(this_entry) return {"result":True} def validate_mandatory_category(self,whole_block): mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"] if len(mand_cats) == 0: return {"result":True} # print("Mandatory categories - {!r}".format(mand_cats) # find which categories each of our datanames belongs to all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()] missing = set(mand_cats) - set(all_cats) if len(missing) > 0: return {"result":False,"bad_items":repr(missing)} return {"result":True} def check_mandatory_items(self,whole_block,default_scope='Item'): """Return an error if any mandatory items are missing""" if len(self.scopes_mandatory)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block]) if len(missing)==0: return {"result":True} else: return {"result":False,"bad_items":missing} def check_prohibited_items(self,whole_block,default_scope='Item'): """Return an error if any prohibited items are present""" if len(self.scopes_naughty)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' present = list([a for a in self.scopes_naughty[scope] if a in whole_block]) if len(present)==0: return {"result":True} else: return {"result":False,"bad_items":present} def run_item_validation(self,item_name,item_value): return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])} def run_loop_validation(self,loop_names): return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])} def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}): results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs]) return {item_name:results} def run_block_validation(self,whole_block,block_scope='Item'): results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs]) # fix up the return values return {"whole_block":results} def optimize_on(self): self.optimize = True self.done_keys = [] self.done_children = [] self.done_parents = [] def optimize_off(self): self.optimize = False self.done_keys = [] self.done_children = [] self.done_parents = [] class ValidCifBlock(CifBlock): """A `CifBlock` that is valid with respect to a given CIF dictionary. Methods of `CifBlock` are overridden where necessary to disallow addition of invalid items to the `CifBlock`. ## Initialisation * `dic` is a `CifDic` object to be used for validation. """ def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords): CifBlock.__init__(self,*args,**kwords) if dic and diclist: print("Warning: diclist argument ignored when initialising ValidCifBlock") if isinstance(dic,CifDic): self.fulldic = dic else: raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument") if len(diclist)==0 and not dic: raise ValidCifError( "At least one dictionary must be specified") if diclist and not dic: self.fulldic = merge_dic(diclist,mergemode) if not self.run_data_checks()[0]: raise ValidCifError( self.report()) def run_data_checks(self,verbose=False): self.v_result = {} self.fulldic.optimize_on() for dataname in self.keys(): update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname])) update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self)) for loop_names in self.loops.values(): update_value(self.v_result,self.fulldic.run_loop_validation(loop_names)) # now run block-level checks update_value(self.v_result,self.fulldic.run_block_validation(self)) # return false and list of baddies if anything didn't match self.fulldic.optimize_off() all_keys = list(self.v_result.keys()) #dictionary will change for test_key in all_keys: #print("%s: %r" % (test_key, self.v_result[test_key])) self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False] if len(self.v_result[test_key]) == 0: del self.v_result[test_key] isvalid = len(self.v_result)==0 #if not isvalid: # print("Baddies: {!r}".format(self.v_result)) return isvalid,self.v_result def single_item_check(self,item_name,item_value): #self.match_single_item(item_name) if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_item_validation(item_name,item_value) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def loop_item_check(self,loop_names): in_dic_names = list([a for a in loop_names if a in self.fulldic]) if len(in_dic_names)==0: result = {loop_names[0]:[]} else: result = self.fulldic.run_loop_validation(in_dic_names) baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies)) return isvalid,baddies def global_item_check(self,item_name,item_value,provisional_items={}): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_global_validation(item_name, item_value,self,provisional_items = provisional_items) baddies = list([a for a in result[item_name] if a[1]["result"] is False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def remove_global_item_check(self,item_name): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_remove_global_validation(item_name,self,False) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def AddToLoop(self,dataname,loopdata): # single item checks paired_data = loopdata.items() for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems) # loop item checks; merge with current loop found = 0 for aloop in self.block["loops"]: if dataname in aloop: loopnames = aloop.keys() for new_name in loopdata.keys(): if new_name not in loopnames: loopnames.append(new_name) valid,problems = self.looped_item_check(loopnames) self.report_if_invalid(valid,problems) prov_dict = loopdata.copy() for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems) CifBlock.AddToLoop(self,dataname,loopdata) def AddCifItem(self,data): if isinstance(data[0],(unicode,str)): # single item valid,problems = self.single_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) valid,problems = self.global_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) elif isinstance(data[0],tuple) or isinstance(data[0],list): paired_data = list(zip(data[0],data[1])) for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems,name) valid,problems = self.loop_item_check(data[0]) self.report_if_invalid(valid,problems,data[0]) prov_dict = {} # for storing temporary items for name,value in paired_data: prov_dict[name]=value for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems,name) else: raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item") super(ValidCifBlock,self).AddCifItem(data) def AddItem(self,key,value,**kwargs): """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary""" valid,problems = self.single_item_check(key,value) self.report_if_invalid(valid,problems,key) valid,problems = self.global_item_check(key,value) self.report_if_invalid(valid,problems,key) super(ValidCifBlock,self).AddItem(key,value,**kwargs) # utility function def report_if_invalid(self,valid,bad_list,data_name): if not valid: bad_tests = [a[0] for a in bad_list] error_string = ",".join(bad_tests) error_string = repr(data_name) + " fails following validity checks: " + error_string raise ValidCifError( error_string) def __delitem__(self,key): # we don't need to run single item checks; we do need to run loop and # global checks. if key in self: try: loop_items = self.GetLoop(key) except TypeError: loop_items = [] if loop_items: #need to check loop conformance loop_names = [a[0] for a in loop_items if a[0] != key] valid,problems = self.loop_item_check(loop_names) self.report_if_invalid(valid,problems) valid,problems = self.remove_global_item_check(key) self.report_if_invalid(valid,problems) self.RemoveCifItem(key) def report(self): outstr = StringIO() outstr.write( "Validation results\n") outstr.write( "------------------\n") print("%d invalid items found\n" % len(self.v_result)) for item_name,val_func_list in self.v_result.items(): outstr.write("%s fails following tests:\n" % item_name) for val_func in val_func_list: outstr.write("\t%s\n") return outstr.getvalue() class ValidCifFile(CifFile): """A CIF file for which all datablocks are valid. Argument `dic` to initialisation specifies a `CifDic` object to use for validation.""" def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs): if not diclist and not dic and not hasattr(self,'bigdic'): raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object") if not dic and diclist: #merge here for speed self.bigdic = merge_dic(diclist,mergemode) elif dic and not diclist: self.bigdic = dic CifFile.__init__(self,*args,**kwargs) for blockname in self.keys(): self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic) def NewBlock(self,blockname,blockcontents,**kwargs): CifFile.NewBlock(self,blockname,blockcontents,**kwargs) # dictionary[blockname] is now a CifBlock object. We # turn it into a ValidCifBlock object self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic, data=self.dictionary[blockname]) class ValidationResult: """Represents validation result. It is initialised with """ def __init__(self,results): """results is return value of validate function""" self.valid_result, self.no_matches = results def report(self,use_html): """Return string with human-readable description of validation result""" return validate_report((self.valid_result, self.no_matches),use_html) def is_valid(self,block_name=None): """Return True for valid CIF file, otherwise False""" if block_name is not None: block_names = [block_name] else: block_names = self.valid_result.iterkeys() for block_name in block_names: if not self.valid_result[block_name] == (True,{}): valid = False break else: valid = True return valid def has_no_match_items(self,block_name=None): """Return true if some items are not found in dictionary""" if block_name is not None: block_names = [block_name] else: block_names = self.no_matches.iter_keys() for block_name in block_names: if self.no_matches[block_name]: has_no_match_items = True break else: has_no_match_items = False return has_no_match_items def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False): """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing, to the results of merging the `CifDic` objects in `diclist` according to `mergemode`. Flag `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be accessed for validation and that mandatory_category should be interpreted differently for DDL2.""" if not isinstance(ciffile,CifFile): check_file = CifFile(ciffile) else: check_file = ciffile if not dic: fulldic = merge_dic(diclist,mergemode) else: fulldic = dic no_matches = {} valid_result = {} if isdic: #assume one block only check_file.scoping = 'instance' #only data blocks visible top_level = check_file.keys()[0] check_file.scoping = 'dictionary' #all blocks visible # collect a list of parents for speed if fulldic.diclang == 'DDL2': poss_parents = fulldic.get_all("_item_linked.parent_name") for parent in poss_parents: curr_parent = listify(check_file.get(parent,[])) new_vals = check_file.get_all(parent) new_vals.extend(curr_parent) if len(new_vals)>0: check_file[parent] = new_vals print("Added %s (len %d)" % (parent,len(check_file[parent]))) # now run the validations for block in check_file.keys(): if isdic and block == top_level: block_scope = 'Dictionary' elif isdic: block_scope = 'Item' else: block_scope = 'Datablock' no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic] # remove non-matching items print("Not matched: " + repr(no_matches[block])) for nogood in no_matches[block]: del check_file[block][nogood] print("Validating block %s, scope %s" % (block,block_scope)) valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope) return valid_result,no_matches def validate_report(val_result,use_html=False): valid_result,no_matches = val_result outstr = StringIO() if use_html: outstr.write("<h2>Validation results</h2>") else: outstr.write( "Validation results\n") outstr.write( "------------------\n") if len(valid_result) > 10: suppress_valid = True #don't clutter with valid messages if use_html: outstr.write("<p>For brevity, valid blocks are not reported in the output.</p>") else: suppress_valid = False for block in valid_result.keys(): block_result = valid_result[block] if block_result[0]: out_line = "Block '%s' is VALID" % block else: out_line = "Block '%s' is INVALID" % block if use_html: if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]: outstr.write( "<h3>%s</h3><p>" % out_line) else: outstr.write( "\n %s\n" % out_line) if len(no_matches[block])!= 0: if use_html: outstr.write( "<p>The following items were not found in the dictionary") outstr.write(" (note that this does not invalidate the data block):</p>") outstr.write("<p><table>\n") [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]] outstr.write("</table>\n") else: outstr.write( "\n The following items were not found in the dictionary:\n") outstr.write("Note that this does not invalidate the data block\n") [outstr.write("%s\n" % it) for it in no_matches[block]] # now organise our results by type of error, not data item... error_type_dic = {} for error_item, error_list in block_result[1].items(): for func_name,bad_result in error_list: bad_result.update({"item_name":error_item}) try: error_type_dic[func_name].append(bad_result) except KeyError: error_type_dic[func_name] = [bad_result] # make a table of test name, test message info_table = {\ 'validate_item_type':\ "The following data items had badly formed values", 'validate_item_esd':\ "The following data items should not have esds appended", 'validate_enum_range':\ "The following data items have values outside permitted range", 'validate_item_enum':\ "The following data items have values outside permitted set", 'validate_looping':\ "The following data items violate looping constraints", 'validate_loop_membership':\ "The following looped data names are of different categories to the first looped data name", 'validate_loop_key':\ "A required dataname for this category is missing from the loop\n containing the dataname", 'validate_loop_key_ddlm':\ "A loop key is missing for the category containing the dataname", 'validate_loop_references':\ "A dataname required by the item is missing from the loop", 'validate_parent':\ "A parent dataname is missing or contains different values", 'validate_child':\ "A child dataname contains different values to the parent", 'validate_uniqueness':\ "One or more data items do not take unique values", 'validate_dependents':\ "A dataname required by the item is missing from the data block", 'validate_exclusion': \ "Both dataname and exclusive alternates or aliases are present in data block", 'validate_mandatory_category':\ "A required category is missing from this block", 'check_mandatory_items':\ "A required data attribute is missing from this block", 'check_prohibited_items':\ "A prohibited data attribute is present in this block"} for test_name,test_results in error_type_dic.items(): if use_html: outstr.write(html_error_report(test_name,info_table[test_name],test_results)) else: outstr.write(error_report(test_name,info_table[test_name],test_results)) outstr.write("\n\n") return outstr.getvalue() # A function to lay out a single error report. We are passed # the name of the error (one of our validation functions), the # explanation to print out, and a dictionary with the error # information. We print no more than 50 characters of the item def error_report(error_name,error_explanation,error_dics): retstring = "\n\n " + error_explanation + ":\n\n" headstring = "%-32s" % "Item name" bodystring = "" if "bad_values" in error_dics[0]: headstring += "%-20s" % "Bad value(s)" if "bad_items" in error_dics[0]: headstring += "%-20s" % "Bad dataname(s)" if "child" in error_dics[0]: headstring += "%-20s" % "Child" if "parent" in error_dics[0]: headstring += "%-20s" % "Parent" headstring +="\n" for error in error_dics: bodystring += "\n%-32s" % error["item_name"] if "bad_values" in error: out_vals = [repr(a)[:50] for a in error["bad_values"]] bodystring += "%-20s" % out_vals if "bad_items" in error: bodystring += "%-20s" % repr(error["bad_items"]) if "child" in error: bodystring += "%-20s" % repr(error["child"]) if "parent" in error: bodystring += "%-20s" % repr(error["parent"]) return retstring + headstring + bodystring # This lays out an HTML error report def html_error_report(error_name,error_explanation,error_dics,annotate=[]): retstring = "<h4>" + error_explanation + ":</h4>" retstring = retstring + "<table cellpadding=5><tr>" headstring = "<th>Item name</th>" bodystring = "" if "bad_values" in error_dics[0]: headstring += "<th>Bad value(s)</th>" if "bad_items" in error_dics[0]: headstring += "<th>Bad dataname(s)</th>" if "child" in error_dics[0]: headstring += "<th>Child</th>" if "parent" in error_dics[0]: headstring += "<th>Parent</th>" headstring +="</tr>\n" for error in error_dics: bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"] if "bad_values" in error: bodystring += "<td>%s</td>" % error["bad_values"] if "bad_items" in error: bodystring += "<td><tt>%s</tt></td>" % error["bad_items"] if "child" in error: bodystring += "<td><tt>%s</tt></td>" % error["child"] if "parent" in error: bodystring += "<td><tt>%s</tt></td>" % error["parent"] bodystring += "</tr>\n" return retstring + headstring + bodystring + "</table>\n" def run_data_checks(check_block,fulldic,block_scope='Item'): v_result = {} for key in check_block.keys(): update_value(v_result, fulldic.run_item_validation(key,check_block[key])) update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block)) for loopnames in check_block.loops.values(): update_value(v_result, fulldic.run_loop_validation(loopnames)) update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope)) # return false and list of baddies if anything didn't match all_keys = list(v_result.keys()) for test_key in all_keys: v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False] if len(v_result[test_key]) == 0: del v_result[test_key] # if even one false one is found, this should trigger # print("Baddies: {!r}".format(v_result)) isvalid = len(v_result)==0 return isvalid,v_result def get_number_with_esd(numstring): numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' our_match = re.match(numb_re,numstring) if our_match: a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) else: return None,None if dot or q: return None,None #a dot or question mark if exp: #has exponent exp = exp.replace("d","e") # mop up old fashioned numbers exp = exp.replace("D","e") base_num = base_num + exp # print("Debug: have %s for base_num from %s" % (base_num,numstring)) base_num = float(base_num) # work out esd, if present. if esd: esd = float(esd[1:-1]) # no brackets if dad: # decimal point + digits esd = esd * (10 ** (-1* len(dad))) if exp: esd = esd * (10 ** (float(exp[1:]))) return base_num,esd def float_with_esd(inval): if isinstance(inval,unicode): j = inval.find("(") if j>=0: return float(inval[:j]) return float(inval) def convert_type(definition): """Convert value to have the type given by definition""" #extract the actual required type information container = definition['_type.container'] dimension = definition.get('_type.dimension',StarFile.StarList([])) structure = interpret_structure(definition['_type.contents']) if container == 'Single': #a single value to convert return convert_single_value(structure) elif container == 'List': #lots of the same value return convert_list_values(structure,dimension) elif container == 'Multiple': #no idea return None elif container in ('Array','Matrix'): #numpy array return convert_matrix_values(structure) return lambda a:a #unable to convert def convert_single_value(type_spec): """Convert a single item according to type_spec""" if type_spec == 'Real': return float_with_esd if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'): return int if type_spec == 'Complex': return complex if type_spec == 'Imag': return lambda a:complex(0,a) if type_spec in ('Code','Name','Tag'): #case-insensitive -> lowercase return lambda a:a.lower() return lambda a:a #can't do anything numeric class convert_simple_list(object): """\ Callable object that converts values in a simple list according to the specified element structure. """ def __init__(self, structure): self.converters = [convert_single_value(tp) for tp in structure] return def __call__(self, element): if len(element) != len(self.converters): emsg = "Expected iterable of %i values, got %i." % ( (len(self.converters), len(element))) raise ValueError(emsg) rv = [f(e) for f, e in zip(self.converters, element)] return rv # End of class convert_single_value def convert_list_values(structure, dimension): """Convert the values according to the element structure given in [[structure]]""" # simple repetition if isinstance(structure, (unicode, str)): fcnv = convert_single_value(structure) # assume structure is a list of types else: fcnv = convert_simple_list(structure) rv = fcnv # setup nested conversion function when dimension differs from 1. if len(dimension) > 0 and int(dimension[0]) != 1: rv = lambda args : [fcnv(a) for a in args] return rv def convert_matrix_values(valtype): """Convert a dREL String or Float valued List structure to a numpy matrix structure""" # first convert to numpy array, then let numpy do the work try: import numpy except ImportError: return lambda a:a #cannot do it if valtype == 'Real': dtype = float elif valtype == 'Integer': dtype = int elif valtype == 'Complex': dtype = complex else: raise ValueError('Unknown matrix value type') fcnv = lambda a : numpy.asarray(a, dtype=dtype) return fcnv def interpret_structure(struc_spec): """Interpret a DDLm structure specification""" from . import TypeContentsParser as t p = t.TypeParser(t.TypeParserScanner(struc_spec)) return getattr(p,"input")() # A utility function to append to item values rather than replace them def update_value(base_dict,new_items): for new_key in new_items.keys(): if new_key in base_dict: base_dict[new_key].extend(new_items[new_key]) else: base_dict[new_key] = new_items[new_key] #Transpose the list of lists passed to us def transpose(base_list): new_lofl = [] full_length = len(base_list) opt_range = range(full_length) for i in range(len(base_list[0])): new_packet = [] for j in opt_range: new_packet.append(base_list[j][i]) new_lofl.append(new_packet) return new_lofl # listify strings - used surprisingly often def listify(item): if isinstance(item,(unicode,str)): return [item] else: return item # given a list of search items, return a list of items # actually contained in the given data block def filter_present(namelist,datablocknames): return [a for a in namelist if a in datablocknames] # Make an item immutable, used if we want a list to be a key def make_immutable(values): """Turn list of StarList values into a list of immutable items""" if not isinstance(values[0],StarList): return values else: return [tuple(a) for a in values] # merge ddl dictionaries. We should be passed filenames or CifFile # objects def merge_dic(diclist,mergemode="replace",ddlspec=None): dic_as_cif_list = [] for dic in diclist: if not isinstance(dic,CifFile) and \ not isinstance(dic,(unicode,str)): raise TypeError("Require list of CifFile names/objects for dictionary merging") if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic)) else: dic_as_cif_list.append(dic) # we now merge left to right basedic = dic_as_cif_list[0] if "on_this_dictionary" in basedic: #DDL1 style only for dic in dic_as_cif_list[1:]: basedic.merge(dic,mode=mergemode,match_att=["_name"]) elif len(basedic.keys()) == 1: #One block: DDL2/m style old_block = basedic[basedic.keys()[0]] for dic in dic_as_cif_list[1:]: new_block = dic[dic.keys()[0]] basedic.merge(dic,mode=mergemode, single_block=[basedic.keys()[0],dic.keys()[0]], match_att=["_item.name"],match_function=find_parent) return CifDic(basedic) def find_parent(ddl2_def): if "_item.name" not in ddl2_def: return None if isinstance(ddl2_def["_item.name"],unicode): return ddl2_def["_item.name"] if "_item_linked.child_name" not in ddl2_def: raise CifError("Asked to find parent in block with no child_names") if "_item_linked.parent_name" not in ddl2_def: raise CifError("Asked to find parent in block with no parent_names") result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]]) if len(result)>1 or len(result)==0: raise CifError("Unable to find single unique parent data item") return result[0] def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF', permissive=False): """ Read in a CIF file, returning a `CifFile` object. * `filename` may be a URL, a file path on the local system, or any object with a `read` method. * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1` is identical except for the exclusion of square brackets as the first characters in undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will read files according to the STAR2 publication. If grammar is `None`, autodetection will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3). * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2). * `scoping` is only relevant where nested save frames are expected (STAR2 only). `instance` scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies. `dictionary` scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value for `standard` is `CIF`, which when set enforces a maximum length of 75 characters for datanames and has no other effect. """ finalcif = CifFile(scoping=scoping,standard=standard) return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype, permissive=permissive) #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs) class CifLoopBlock(StarFile.LoopBlock): def __init__(self,data=(),**kwargs): super(CifLoopBlock,self).__init__(data,**kwargs) #No documentation flags
Functions
def ReadCif(
filename, grammar=u'auto', scantype=u'standard', scoping=u'instance', standard=u'CIF', permissive=False)
Read in a CIF file, returning a CifFile
object.
-
filename
may be a URL, a file path on the local system, or any object with aread
method. -
grammar
chooses the CIF grammar variant.1.0
is the original 1992 grammar and1.1
is identical except for the exclusion of square brackets as the first characters in undelimited datanames.2.0
will read files in the CIF2.0 standard, andSTAR2
will read files according to the STAR2 publication. If grammar isNone
, autodetection will be attempted in the order2.0
,1.1
and1.0
. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3). -
scantype
can bestandard
orflex
.standard
provides pure Python parsing at the cost of a factor of 10 or so in speed.flex
will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2). -
scoping
is only relevant where nested save frames are expected (STAR2 only).instance
scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies.dictionary
scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value forstandard
isCIF
, which when set enforces a maximum length of 75 characters for datanames and has no other effect.
def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF', permissive=False): """ Read in a CIF file, returning a `CifFile` object. * `filename` may be a URL, a file path on the local system, or any object with a `read` method. * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1` is identical except for the exclusion of square brackets as the first characters in undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will read files according to the STAR2 publication. If grammar is `None`, autodetection will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3). * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2). * `scoping` is only relevant where nested save frames are expected (STAR2 only). `instance` scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies. `dictionary` scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value for `standard` is `CIF`, which when set enforces a maximum length of 75 characters for datanames and has no other effect. """ finalcif = CifFile(scoping=scoping,standard=standard) return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype, permissive=permissive)
def Validate(
ciffile, dic=u'', diclist=[], mergemode=u'replace', isdic=False)
Validate the ciffile
conforms to the definitions in CifDic
object dic
, or if dic
is missing,
to the results of merging the CifDic
objects in diclist
according to mergemode
. Flag
isdic
indicates that ciffile
is a CIF dictionary meaning that save frames should be
accessed for validation and that mandatory_category should be interpreted differently for DDL2.
def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False): """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing, to the results of merging the `CifDic` objects in `diclist` according to `mergemode`. Flag `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be accessed for validation and that mandatory_category should be interpreted differently for DDL2.""" if not isinstance(ciffile,CifFile): check_file = CifFile(ciffile) else: check_file = ciffile if not dic: fulldic = merge_dic(diclist,mergemode) else: fulldic = dic no_matches = {} valid_result = {} if isdic: #assume one block only check_file.scoping = 'instance' #only data blocks visible top_level = check_file.keys()[0] check_file.scoping = 'dictionary' #all blocks visible # collect a list of parents for speed if fulldic.diclang == 'DDL2': poss_parents = fulldic.get_all("_item_linked.parent_name") for parent in poss_parents: curr_parent = listify(check_file.get(parent,[])) new_vals = check_file.get_all(parent) new_vals.extend(curr_parent) if len(new_vals)>0: check_file[parent] = new_vals print("Added %s (len %d)" % (parent,len(check_file[parent]))) # now run the validations for block in check_file.keys(): if isdic and block == top_level: block_scope = 'Dictionary' elif isdic: block_scope = 'Item' else: block_scope = 'Datablock' no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic] # remove non-matching items print("Not matched: " + repr(no_matches[block])) for nogood in no_matches[block]: del check_file[block][nogood] print("Validating block %s, scope %s" % (block,block_scope)) valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope) return valid_result,no_matches
Classes
class CifBlock
A class to hold a single block of a CIF file. A CifBlock
object can be treated as
a Python dictionary, in particular, individual items can be accessed using square
brackets e.g. b['_a_dataname']
. All other Python dictionary methods are also
available (e.g. keys()
, values()
). Looped datanames will return a list of values.
Initialisation
When provided, data
should be another CifBlock
whose contents will be copied to
this block.
-
if
strict
is set, maximum name lengths will be enforced -
maxoutlength
is the maximum length for output lines -
wraplength
is the ideal length to make output lines -
When set,
overwrite
allows the values of datanames to be changed (otherwise an error is raised). -
compat_mode
will allow deprecated behaviour of creating single-dataname loops using the syntaxa[_dataname] = [1,2,3,4]
. This should now be done by callingCreateLoop
after setting the dataitem value.
class CifBlock(StarFile.StarBlock): """ A class to hold a single block of a CIF file. A `CifBlock` object can be treated as a Python dictionary, in particular, individual items can be accessed using square brackets e.g. `b['_a_dataname']`. All other Python dictionary methods are also available (e.g. `keys()`, `values()`). Looped datanames will return a list of values. ## Initialisation When provided, `data` should be another `CifBlock` whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs): """When provided, `data` should be another CifBlock whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ if strict: maxnamelength=75 else: maxnamelength=-1 super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs) self.dictionary = None #DDL dictionary referring to this block self.compat_mode = compat_mode #old-style behaviour of setitem def RemoveCifItem(self,itemname): """Remove `itemname` from the CifBlock""" self.RemoveItem(itemname) def __setitem__(self,key,value): self.AddItem(key,value) # for backwards compatibility make a single-element loop if self.compat_mode: if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList): # single element loop self.CreateLoop([key]) def copy(self): newblock = super(CifBlock,self).copy() return self.copy.im_class(newblock) #catch inheritance def AddCifItem(self,data): """ *DEPRECATED*. Use `AddItem` instead.""" # we accept only tuples, strings and lists!! if not (isinstance(data[0],(unicode,tuple,list,str))): raise TypeError('Cif datanames are either a string, tuple or list') # we catch single item loops as well... if isinstance(data[0],(unicode,str)): self.AddSingleCifItem(data[0],list(data[1])) if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList): # a single element loop self.CreateLoop([data[0]]) return # otherwise, we loop over the datanames keyvals = zip(data[0][0],[list(a) for a in data[1][0]]) [self.AddSingleCifItem(a,b) for a,b in keyvals] # and create the loop self.CreateLoop(data[0][0]) def AddSingleCifItem(self,key,value): """*Deprecated*. Use `AddItem` instead""" """Add a single data item. If it is part of a loop, a separate call should be made""" self.AddItem(key,value) def loopnames(self): return [self.loops[a] for a in self.loops]
Ancestors (in MRO)
- CifBlock
- CifFile.StarFile.StarBlock
- __builtin__.object
Methods
def AddCifItem(
self, data)
DEPRECATED. Use AddItem
instead.
def AddCifItem(self,data): """ *DEPRECATED*. Use `AddItem` instead.""" # we accept only tuples, strings and lists!! if not (isinstance(data[0],(unicode,tuple,list,str))): raise TypeError('Cif datanames are either a string, tuple or list') # we catch single item loops as well... if isinstance(data[0],(unicode,str)): self.AddSingleCifItem(data[0],list(data[1])) if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList): # a single element loop self.CreateLoop([data[0]]) return # otherwise, we loop over the datanames keyvals = zip(data[0][0],[list(a) for a in data[1][0]]) [self.AddSingleCifItem(a,b) for a,b in keyvals] # and create the loop self.CreateLoop(data[0][0])
def AddItem(
self, key, value, precheck=False)
Add dataname key
to block with value value
. value
may be
a single value, a list or a tuple. If precheck
is False (the default),
all values will be checked and converted to unicode strings as necessary. If
precheck
is True, this checking is bypassed. No checking is necessary
when values are read from a CIF file as they are already in correct form.
def AddItem(self,key,value,precheck=False): """Add dataname `key` to block with value `value`. `value` may be a single value, a list or a tuple. If `precheck` is False (the default), all values will be checked and converted to unicode strings as necessary. If `precheck` is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.""" if not isinstance(key,(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( key )) key = unicode(key) #everything is unicode internally if not precheck: self.check_data_name(key,self.maxnamelength) # make sure no nasty characters # check for overwriting if key in self: if not self.overwrite: raise StarError( 'Attempt to insert duplicate item name %s' % key) if not precheck: #need to sanitise regval,empty_val = self.regularise_data(value) pure_string = check_stringiness(regval) self.check_item_value(regval) else: regval,empty_val = value,None pure_string = True # update ancillary information first lower_key = key.lower() if not lower_key in self and self.FindLoop(lower_key)<0: #need to add to order self.item_order.append(lower_key) # always remove from our case table in case the case is different try: del self.true_case[lower_key] except KeyError: pass self.true_case[lower_key] = key if pure_string: self.block.update({lower_key:[regval,empty_val]}) else: self.block.update({lower_key:[empty_val,regval]})
def AddLoopItem(
self, incomingdata, precheck=False, maxlength=-1)
Deprecated. Use AddItem
followed by CreateLoop
if
necessary.
def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): """*Deprecated*. Use `AddItem` followed by `CreateLoop` if necessary.""" # print "Received data %s" % `incomingdata` # we accept tuples, strings, lists and dicts!! # Direct insertion: we have a string-valued key, with an array # of values -> single-item into our loop if isinstance(incomingdata[0],(tuple,list)): # a whole loop keyvallist = zip(incomingdata[0],incomingdata[1]) for key,value in keyvallist: self.AddItem(key,value) self.CreateLoop(incomingdata[0]) elif not isinstance(incomingdata[0],(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) else: self.AddItem(incomingdata[0],incomingdata[1])
def AddLoopName(
self, oldname, newname)
Add newname
to the loop containing oldname
. If it is already in the new loop, no
error is raised. If newname
is in a different loop, it is removed from that loop.
The number of values associated with newname
must match the number of values associated
with all other columns of the new loop or a ValueError
will be raised.
def AddLoopName(self,oldname, newname): """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no error is raised. If `newname` is in a different loop, it is removed from that loop. The number of values associated with `newname` must match the number of values associated with all other columns of the new loop or a `ValueError` will be raised.""" lower_newname = newname.lower() loop_no = self.FindLoop(oldname) if loop_no < 0: raise KeyError('%s not in loop' % oldname) if lower_newname in self.loops[loop_no]: return # check length old_provides = self.provide_value self.provide_value = False loop_len = len(self[oldname]) self.provide_value = old_provides if len(self[newname]) != loop_len: raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) # remove from any other loops [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] # and add to this loop self.loops[loop_no].append(lower_newname) # remove from item_order if present try: self.item_order.remove(lower_newname) except ValueError: pass
def AddSingleCifItem(
self, key, value)
Deprecated. Use AddItem
instead
def AddSingleCifItem(self,key,value): """*Deprecated*. Use `AddItem` instead""" """Add a single data item. If it is part of a loop, a separate call should be made""" self.AddItem(key,value)
def AddToLoop(
self, dataname, loopdata)
Deprecated. Use AddItem
followed by calls to AddLoopName
.
Add multiple columns to the loop containing dataname
. loopdata
is a
collection of (key,value) pairs, where key
is the new dataname and value
is a list of values for that dataname
def AddToLoop(self,dataname,loopdata): """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. Add multiple columns to the loop containing `dataname`. `loopdata` is a collection of (key,value) pairs, where `key` is the new dataname and `value` is a list of values for that dataname""" # check lengths thisloop = self.FindLoop(dataname) loop_len = len(self[dataname]) bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] if len(bad_vals)>0: raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ % (repr( bad_vals ),loop_len)) self.update(loopdata) self.loops[thisloop]+=loopdata.keys()
def ChangeItemOrder(
self, itemname, newpos)
Move the printout order of itemname
to newpos
. If itemname
is
in a loop, newpos
refers to the order within the loop.
def ChangeItemOrder(self,itemname,newpos): """Move the printout order of `itemname` to `newpos`. If `itemname` is in a loop, `newpos` refers to the order within the loop.""" if isinstance(itemname,(unicode,str)): true_name = itemname.lower() else: true_name = itemname loopno = self.FindLoop(true_name) if loopno < 0: #top level self.item_order.remove(true_name) self.item_order.insert(newpos,true_name) else: self.loops[loopno].remove(true_name) self.loops[loopno].insert(newpos,true_name)
def CreateLoop(
self, datanames, order=-1, length_check=True)
Create a loop in the datablock. datanames
is a list of datanames that
together form a loop. If length_check is True, they should have been initialised in the block
to have the same number of elements (possibly 0). If order
is given,
the loop will appear at this position in the block when printing
out. A loop counts as a single position.
def CreateLoop(self,datanames,order=-1,length_check=True): """Create a loop in the datablock. `datanames` is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If `order` is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.""" if length_check: # check lengths: these datanames should exist listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] if len(listed_values) == len(datanames): len_set = set([len(self[a]) for a in datanames]) if len(len_set)>1: raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) elif len(listed_values) != 0: raise ValueError('Request to loop datanames where some are single values and some are not') # store as lower case lc_datanames = [d.lower() for d in datanames] # remove these datanames from all other loops [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] # remove empty loops empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] for a in empty_loops: self.item_order.remove(a) del self.loops[a] if len(self.loops)>0: loopno = max(self.loops.keys()) + 1 else: loopno = 1 self.loops[loopno] = list(lc_datanames) if order >= 0: self.item_order.insert(order,loopno) else: self.item_order.append(loopno) # remove these datanames from item ordering self.item_order = [a for a in self.item_order if a not in lc_datanames]
def FindLoop(
self, keyname)
Find the loop that contains keyname
and return its numerical index or
-1 if not present. The numerical index can be used to refer to the loop in
other routines.
def FindLoop(self,keyname): """Find the loop that contains `keyname` and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.""" loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] if len(loop_no)>0: return loop_no[0] else: return -1
def GetCompoundKeyedPacket(
self, keydict)
Return the loop packet (a StarPacket
object) where the {key:(value,caseless)}
pairs
in keydict
take the appropriate values. Ignore case for a given key
if caseless
is
True. ValueError
is raised if no packet is found or more than one packet is found.
def GetCompoundKeyedPacket(self,keydict): """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname]) keynames = list(keydict.keys()) my_loop = self.GetLoop(keynames[0]) for one_key in keynames: keyval,no_case = keydict[one_key] if no_case: my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()]) else: my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval]) if len(my_loop)!=1: raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop))) print("Compound keyed packet: %s" % my_loop[0]) return my_loop[0]
def GetFullItemValue(
self, itemname)
Return the value associated with itemname
, and a boolean flagging whether
(True) or not (False) it is in a form suitable for calculation. False is
always returned for strings and StarList
objects.
def GetFullItemValue(self,itemname): """Return the value associated with `itemname`, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and `StarList` objects.""" try: s,v = self.block[itemname.lower()] except KeyError: raise KeyError('Itemname %s not in datablock' % itemname) # prefer string value unless all are None # are we a looped value? if not isinstance(s,(tuple,list)) or isinstance(s,StarList): if not_none(s): return s,False #a string value else: return v,not isinstance(v,StarList) #a StarList is not calculation-ready elif not_none(s): return s,False #a list of string values else: if len(v)>0: return v,not isinstance(v[0],StarList) return v,True
def GetItemOrder(
self)
Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index
def GetItemOrder(self): """Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index""" return self.item_order[:]
def GetItemPosition(
self, itemname)
A utility function to get the numerical order in the printout
of itemname
. An item has coordinate (loop_no,pos)
with
the top level having a loop_no
of -1. If an integer is passed to
the routine then it will return the position of the loop
referenced by that number.
def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos
def GetItemValue(
self, itemname)
Return value of itemname
. If itemname
is looped, a list
of all values will be returned.
def GetItemValue(self,itemname): """Return value of `itemname`. If `itemname` is looped, a list of all values will be returned.""" return self.GetFullItemValue(itemname)[0]
def GetKeyedPacket(
self, keyname, keyvalue, no_case=False)
Return the loop packet (a StarPacket
object) where keyname
has value
keyvalue
. Ignore case in keyvalue
if no_case
is True. ValueError
is raised if no packet is found or more than one packet is found.
def GetKeyedPacket(self,keyname,keyvalue,no_case=False): """Return the loop packet (a `StarPacket` object) where `keyname` has value `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" my_loop = self.GetLoop(keyname) #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) #print('Packet check on:' + keyname) #[print(repr(getattr(a,keyname))) for a in my_loop] if no_case: one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] else: one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] if len(one_pack)!=1: raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) print("Keyed packet: %s" % one_pack[0]) return one_pack[0]
def GetKeyedSemanticPacket(
self, keyvalue, cat_id)
Return a complete packet for category cat_id
where the
category key for the category equals keyvalue
. This routine
will understand any joined loops, so if separate loops in the
datafile belong to the
same category hierarchy (e.g. _atom_site
and _atom_site_aniso
),
the returned StarPacket
object will contain datanames from
both categories.
def GetKeyedSemanticPacket(self,keyvalue,cat_id): """Return a complete packet for category `cat_id` where the category key for the category equals `keyvalue`. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from both categories.""" target_keys = self.dictionary.cat_key_table[cat_id] target_keys = [k[0] for k in target_keys] #one only in each list p = StarPacket() # set case-sensitivity flag lcase = False if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']: lcase = True for cat_key in target_keys: try: extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except KeyError: #missing key try: test_key = self[cat_key] #generate key if possible print('Test key is %s' % repr( test_key )) if test_key is not None and\ not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)): print('Getting packet for key %s' % repr( keyvalue )) extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except: #cannot be generated continue except ValueError: #none/more than one, assume none continue #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue) p.merge_packet(extra_packet) # the following attributes used to calculate missing values for keyname in target_keys: if hasattr(p,keyname): p.key = [keyname] break if not hasattr(p,"key"): raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p))) p.cif_dictionary = self.dictionary p.fulldata = self return p
def GetLoop(
self, keyname)
Return a StarFile.LoopBlock
object constructed from the loop containing keyname
.
keyname
is only significant as a way to specify the loop.
def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname)
def GetLoopNames(
self, keyname)
Return all datanames appearing together with keyname
def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname)
def GetMultiKeyedSemanticPacket(
self, keydict, cat_id)
Return a complete packet for category cat_id
where the keyvalues are
provided as a dictionary of key:(value,caseless) pairs
This routine
will understand any joined loops, so if separate loops in the
datafile belong to the
same category hierarchy (e.g. _atom_site
and _atom_site_aniso
),
the returned StarPacket
object will contain datanames from
the requested category and any children.
def GetMultiKeyedSemanticPacket(self,keydict,cat_id): """Return a complete packet for category `cat_id` where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from the requested category and any children.""" #if len(keyvalues)==1: #simplification # return self.GetKeyedSemanticPacket(keydict[1][0],cat_id) target_keys = self.dictionary.cat_key_table[cat_id] # update the dictionary passed to us with all equivalents, for # simplicity. parallel_keys = list(zip(*target_keys)) #transpose print('Parallel keys:' + repr(parallel_keys)) print('Keydict:' + repr(keydict)) start_keys = list(keydict.keys()) for one_name in start_keys: key_set = [a for a in parallel_keys if one_name in a] for one_key in key_set: keydict[one_key] = keydict[one_name] # target_keys is a list of lists, each of which is a compound key p = StarPacket() # a little function to return the dataname for a key def find_key(key): for one_key in self.dictionary.key_equivs.get(key,[])+[key]: if self.has_key(one_key): return one_key return None for one_set in target_keys: #loop down the categories true_keys = [find_key(k) for k in one_set] true_keys = [k for k in true_keys if k is not None] if len(true_keys)==len(one_set): truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)]) try: extra_packet = self.GetCompoundKeyedPacket(truekeydict) except KeyError: #one or more are missing continue #should try harder? except ValueError: continue else: continue print('Merging packet for keys ' + repr(one_set)) p.merge_packet(extra_packet) # the following attributes used to calculate missing values p.key = true_keys p.cif_dictionary = self.dictionary p.fulldata = self return p
def RemoveCifItem(
self, itemname)
Remove itemname
from the CifBlock
def RemoveCifItem(self,itemname): """Remove `itemname` from the CifBlock""" self.RemoveItem(itemname)
def RemoveItem(
self, itemname)
Remove itemname
from the block.
def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey)
def RemoveKeyedPacket(
self, keyname, keyvalue)
Remove the packet for which dataname keyname
takes
value keyvalue
. Only the first such occurrence is
removed.
def RemoveKeyedPacket(self,keyname,keyvalue): """Remove the packet for which dataname `keyname` takes value `keyvalue`. Only the first such occurrence is removed.""" packet_coord = list(self[keyname]).index(keyvalue) loopnames = self.GetLoopNames(keyname) for dataname in loopnames: self.block[dataname][0] = list(self.block[dataname][0]) del self.block[dataname][0][packet_coord] self.block[dataname][1] = list(self.block[dataname][1]) del self.block[dataname][1][packet_coord]
def RemoveLoopItem(
self, itemname)
Deprecated. Use RemoveItem
instead
def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname)
def SetOutputLength(
self, wraplength=80, maxoutlength=2048)
Set the maximum output line length (maxoutlength
) and the line length to
wrap at (wraplength
). The wrap length is a target only and may not always be
possible.
def SetOutputLength(self,wraplength=80,maxoutlength=2048): """Set the maximum output line length (`maxoutlength`) and the line length to wrap at (`wraplength`). The wrap length is a target only and may not always be possible.""" if wraplength > maxoutlength: raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength)) self.wraplength = wraplength self.maxoutlength = maxoutlength
class CifDic
Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only):
-
do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False
-
do_imports = No/Full/Contents/All: If not 'No', intepret _import.get statements for Full mode/Contents mode/Both respectively. See also option 'heavy'
-
do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All
-
heavy = True/False: (Experimental). If True, importation overwrites definitions. If False, attributes are resolved dynamically.
class CifDic(StarFile.StarFile): """Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only): * do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False * do_imports = No/Full/Contents/All: If not 'No', intepret _import.get statements for Full mode/Contents mode/Both respectively. See also option 'heavy' * do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All * heavy = True/False: (Experimental). If True, importation overwrites definitions. If False, attributes are resolved dynamically. """ def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True, grammar='auto',heavy=True,**kwargs): self.do_minimum = do_minimum if do_minimum: do_imports = 'No' do_dREL = False if do_dREL: do_imports = 'All' if heavy == 'Light' and do_imports not in ('contents','No'): raise(ValueError,"Light imports only available for mode 'contents'") self.template_cache = {} #for DDLm imports self.ddlm_functions = {} #for DDLm functions self.switch_numpy(False) #no Numpy arrays returned super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs) self.standard = 'Dic' #for correct output order self.scoping = 'dictionary' (self.dicname,self.diclang) = self.dic_determine() print('%s is a %s dictionary' % (self.dicname,self.diclang)) self.scopes_mandatory = {} self.scopes_naughty = {} # rename and expand out definitions using "_name" in DDL dictionaries if self.diclang == "DDL1": self.DDL1_normalise() #this removes any non-definition entries self.create_def_block_table() #From now on, [] uses definition_id if self.diclang == "DDL1": self.ddl1_cat_load() elif self.diclang == "DDL2": self.DDL2_normalise() #iron out some DDL2 tricky bits elif self.diclang == "DDLm": self.scoping = 'dictionary' #expose all save frames if do_imports is not 'No': self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine self.create_alias_table() self.create_cat_obj_table() self.create_cat_key_table() if do_dREL: print('Doing full dictionary initialisation') self.initialise_drel() self.add_category_info(full=do_dREL) # initialise type information self.typedic={} self.primdic = {} #typecode<->primitive type translation self.add_type_info() self.install_validation_functions() def dic_determine(self): if "on_this_dictionary" in self: self.master_block = super(CifDic,self).__getitem__("on_this_dictionary") self.def_id_spec = "_name" self.cat_id_spec = "_category.id" #we add this ourselves self.type_spec = "_type" self.enum_spec = "_enumeration" self.cat_spec = "_category" self.esd_spec = "_type_conditions" self.must_loop_spec = "_list" self.must_exist_spec = "_list_mandatory" self.list_ref_spec = "_list_reference" self.key_spec = "_list_mandatory" self.unique_spec = "_list_uniqueness" self.child_spec = "_list_link_child" self.parent_spec = "_list_link_parent" self.related_func = "_related_function" self.related_item = "_related_item" self.primitive_type = "_type" self.dep_spec = "xxx" self.cat_list = [] #to save searching all the time name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"] version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"] return (name+version,"DDL1") elif len(self.get_roots()) == 1: # DDL2/DDLm self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0]) # now change to dictionary scoping self.scoping = 'dictionary' name = self.master_block["_dictionary.title"] version = self.master_block["_dictionary.version"] if self.master_block.has_key("_dictionary.class"): #DDLm self.enum_spec = '_enumeration_set.state' self.key_spec = '_category.key_id' self.must_exist_spec = None self.cat_spec = '_name.category_id' self.primitive_type = '_type.contents' self.cat_id_spec = "_definition.id" self.def_id_spec = "_definition.id" return(name+version,"DDLm") else: #DDL2 self.cat_id_spec = "_category.id" self.def_id_spec = "_item.name" self.key_spec = "_category_mandatory.name" self.type_spec = "_item_type.code" self.enum_spec = "_item_enumeration.value" self.esd_spec = "_item_type_conditions.code" self.cat_spec = "_item.category_id" self.loop_spec = "there_is_no_loop_spec!" self.must_loop_spec = "xxx" self.must_exist_spec = "_item.mandatory_code" self.child_spec = "_item_linked.child_name" self.parent_spec = "_item_linked.parent_name" self.related_func = "_item_related.function_code" self.related_item = "_item_related.related_name" self.unique_spec = "_category_key.name" self.list_ref_spec = "xxx" self.primitive_type = "_type" self.dep_spec = "_item_dependent.dependent_name" return (name+version,"DDL2") else: raise CifError("Unable to determine dictionary DDL version") def DDL1_normalise(self): # switch off block name collision checks self.standard = None # add default type information in DDL2 style # initial types and constructs base_types = ["char","numb","null"] prim_types = base_types[:] base_constructs = [".*", '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.', "\"\" "] for key,value in self.items(): newnames = [key] #keep by default if "_name" in value: real_name = value["_name"] if isinstance(real_name,list): #looped values for looped_name in real_name: new_value = value.copy() new_value["_name"] = looped_name #only looped name self[looped_name] = new_value newnames = real_name else: self[real_name] = value newnames = [real_name] # delete the old one if key not in newnames: del self[key] # loop again to normalise the contents of each definition for key,value in self.items(): #unlock the block save_overwrite = value.overwrite value.overwrite = True # deal with a missing _list, _type_conditions if "_list" not in value: value["_list"] = 'no' if "_type_conditions" not in value: value["_type_conditions"] = 'none' # deal with enumeration ranges if "_enumeration_range" in value: max,min = self.getmaxmin(value["_enumeration_range"]) if min == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min)))) elif max == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min)))) else: self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min)))) #add any type construct information if "_type_construct" in value: base_types.append(value["_name"]+"_type") #ie dataname_type base_constructs.append(value["_type_construct"]+"$") prim_types.append(value["_type"]) #keep a record value["_type"] = base_types[-1] #the new type name #make categories conform with ddl2 #note that we must remove everything from the last underscore if value.get("_category",None) == "category_overview": last_under = value["_name"].rindex("_") catid = value["_name"][1:last_under] value["_category.id"] = catid #remove square bracks if catid not in self.cat_list: self.cat_list.append(catid) value.overwrite = save_overwrite # we now add any missing categories before filling in the rest of the # information for key,value in self.items(): #print('processing ddl1 definition %s' % key) if "_category" in self[key]: if self[key]["_category"] not in self.cat_list: # rogue category, add it in newcat = self[key]["_category"] fake_name = "_" + newcat + "_[]" newcatdata = CifBlock() newcatdata["_category"] = "category_overview" newcatdata["_category.id"] = newcat newcatdata["_type"] = "null" self[fake_name] = newcatdata self.cat_list.append(newcat) # write out the type information in DDL2 style self.master_block.AddLoopItem(( ("_item_type_list.code","_item_type_list.construct", "_item_type_list.primitive_code"), (base_types,base_constructs,prim_types) )) def ddl1_cat_load(self): deflist = self.keys() #slight optimization cat_mand_dic = {} cat_unique_dic = {} # a function to extract any necessary information from each definition def get_cat_info(single_def): if self[single_def].get(self.must_exist_spec)=='yes': thiscat = self[single_def]["_category"] curval = cat_mand_dic.get(thiscat,[]) curval.append(single_def) cat_mand_dic[thiscat] = curval # now the unique items... # cif_core.dic throws us a curly one: the value of list_uniqueness is # not the same as the defined item for publ_body_label, so we have # to collect both together. We assume a non-listed entry, which # is true for all current (May 2005) ddl1 dictionaries. if self[single_def].get(self.unique_spec,None)!=None: thiscat = self[single_def]["_category"] new_unique = self[single_def][self.unique_spec] uis = cat_unique_dic.get(thiscat,[]) if single_def not in uis: uis.append(single_def) if new_unique not in uis: uis.append(new_unique) cat_unique_dic[thiscat] = uis [get_cat_info(a) for a in deflist] # apply the above function for cat in cat_mand_dic.keys(): self[cat]["_category_mandatory.name"] = cat_mand_dic[cat] for cat in cat_unique_dic.keys(): self[cat]["_category_key.name"] = cat_unique_dic[cat] def create_pcloop(self,definition): old_children = self[definition].get('_item_linked.child_name',[]) old_parents = self[definition].get('_item_linked.parent_name',[]) if isinstance(old_children,unicode): old_children = [old_children] if isinstance(old_parents,unicode): old_parents = [old_parents] if (len(old_children)==0 and len(old_parents)==0) or \ (len(old_children) > 1 and len(old_parents)>1): return if len(old_children)==0: old_children = [definition]*len(old_parents) if len(old_parents)==0: old_parents = [definition]*len(old_children) newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) try: del self[definition]['_item_linked.parent_name'] del self[definition]['_item_linked.child_name'] except KeyError: pass self[definition].insert_loop(newloop) def DDL2_normalise(self): listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys()) # now filter out all the single element lists! dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs) for item_def in dodgy_defs: # print("DDL2 norm: processing %s" % item_def) thisdef = self[item_def] packet_no = thisdef['_item.name'].index(item_def) realcat = thisdef['_item.category_id'][packet_no] realmand = thisdef['_item.mandatory_code'][packet_no] # first add in all the missing categories # we don't replace the entry in the list corresponding to the # current item, as that would wipe out the information we want for child_no in range(len(thisdef['_item.name'])): if child_no == packet_no: continue child_name = thisdef['_item.name'][child_no] child_cat = thisdef['_item.category_id'][child_no] child_mand = thisdef['_item.mandatory_code'][child_no] if child_name not in self: self[child_name] = CifBlock() self[child_name]['_item.name'] = child_name self[child_name]['_item.category_id'] = child_cat self[child_name]['_item.mandatory_code'] = child_mand self[item_def]['_item.name'] = item_def self[item_def]['_item.category_id'] = realcat self[item_def]['_item.mandatory_code'] = realmand target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \ '_item_linked.parent_name' in self[a]] # now dodgy_defs contains all definition blocks with more than one child/parent link for item_def in dodgy_defs: self.create_pcloop(item_def) #regularise appearance for item_def in dodgy_defs: print('Processing %s' % item_def) thisdef = self[item_def] child_list = thisdef['_item_linked.child_name'] parents = thisdef['_item_linked.parent_name'] # for each parent, find the list of children. family = list(zip(parents,child_list)) notmychildren = family #We aim to remove non-children # Loop over the parents, relocating as necessary while len(notmychildren): # get all children of first entry mychildren = [a for a in family if a[0]==notmychildren[0][0]] print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren))) for parent,child in mychildren: #parent is the same for all # Make sure that we simply add in the new entry for the child, not replace it, # otherwise we might spoil the child entry loop structure try: childloop = self[child].GetLoop('_item_linked.parent_name') except KeyError: print('Creating new parent entry %s for definition %s' % (parent,child)) self[child]['_item_linked.parent_name'] = [parent] childloop = self[child].GetLoop('_item_linked.parent_name') childloop.AddLoopItem(('_item_linked.child_name',[child])) continue else: # A parent loop already exists and so will a child loop due to the # call to create_pcloop above pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child] goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent] if len(goodpars)>0: #no need to add it print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child)) continue print('Adding %s to %s entry' % (parent,child)) newpacket = childloop.GetPacket(0) #essentially a copy, I hope setattr(newpacket,'_item_linked.child_name',child) setattr(newpacket,'_item_linked.parent_name',parent) childloop.AddPacket(newpacket) # # Make sure the parent also points to the children. We get # the current entry, then add our # new values if they are not there already # parent_name = mychildren[0][0] old_children = self[parent_name].get('_item_linked.child_name',[]) old_parents = self[parent_name].get('_item_linked.parent_name',[]) oldfamily = zip(old_parents,old_children) newfamily = [] print('Old parents -> %s' % repr(old_parents)) for jj, childname in mychildren: alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname] if len(alreadythere)>0: continue 'Adding new child %s to parent definition at %s' % (childname,parent_name) old_children.append(childname) old_parents.append(parent_name) # Now output the loop, blowing away previous definitions. If there is something # else in this category, we are destroying it. newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) del self[parent_name]['_item_linked.parent_name'] del self[parent_name]['_item_linked.child_name'] self[parent_name].insert_loop(newloop) print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name'])) # now make a new,smaller list notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]] # now flatten any single element lists single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs) for flat_def in single_defs: flat_keys = self[flat_def].GetLoop('_item.name').keys() for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0] # now deal with the multiple lists # next we do aliases all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')] for aliased in all_aliases: my_aliases = listify(self[aliased]['_item_aliases.alias_name']) for alias in my_aliases: self[alias] = self[aliased].copy() #we are going to delete stuff... del self[alias]["_item_aliases.alias_name"] def ddlm_parse_valid(self): if "_dictionary_valid.application" not in self.master_block: return for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"): scope = getattr(scope_pack,"_dictionary_valid.application") valid_info = getattr(scope_pack,"_dictionary_valid.attributes") if scope[1] == "Mandatory": self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info) elif scope[1] == "Prohibited": self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info) def obtain_imports(self,import_mode,heavy=False): """Collate import information""" self._import_dics = [] import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]]) print('Import mode %s applied to following frames' % import_mode) print(str([a[0] for a in import_frames])) if import_mode != 'All': for i in range(len(import_frames)): import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()]) print('Importing following frames in mode %s' % import_mode) print(str(import_frames)) #resolve all references for parent_block,import_list in import_frames: for import_ref in import_list: file_loc = import_ref["file"] full_uri = self.resolve_path(file_loc) if full_uri not in self.template_cache: dic_as_cif = CifFile(full_uri,grammar=self.grammar) self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False) #this will recurse internal imports print('Added %s to cached dictionaries' % full_uri) import_from = self.template_cache[full_uri] dupl = import_ref.get('dupl','Exit') miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,full_uri)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if target_key in self and mode=='full': #so blockname will be duplicated if dupl == 'Exit': raise CifError('Import frame %s already in dictionary' % target_key) elif dupl == 'Ignore': continue if heavy: self.ddlm_import(parent_block,import_from,import_target,target_key,mode) else: self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode) def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'): """Import other dictionaries in place""" if mode == 'contents': #merge attributes only self[parent_block].merge(import_target) elif mode =="full": # Do the syntactic merge syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting from_cat_head = import_target['_name.object_id'] child_frames = import_from.ddlm_all_children(from_cat_head) # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False child_frames.remove(from_cat_head) # As we are in syntax land, we call the CifFile methods child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames]) child_blocks = super(CifDic,import_from).makebc(child_blocks) # Prune out any datablocks that have identical definitions from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()]) double_defs = list([b for b in from_defs.items() if self.has_key(b[1])]) print('Definitions for %s superseded' % repr(double_defs)) for b in double_defs: del child_blocks[b[0]] super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head) # print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames), mode,len(self))) # Now the semantic merge # First expand our definition <-> blockname tree self.create_def_block_table() merging_cat = self[parent_block]['_name.object_id'] #new parent if head_to_head: child_frames = self.ddlm_immediate_children(from_cat_head) #old children #the new parent is the importing category for all old children for f in child_frames: self[f].overwrite = True self[f]['_name.category_id'] = merging_cat self[f].overwrite = False # remove the old head del self[from_cat_head] print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat)) else: #imported category is only child from_frame = import_from[target_key]['_definition.id'] #so we can find it child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0] self[child_frame]['_name.category_id'] = merging_cat print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat)) # it will never happen again... del self[parent_block]["_import.get"] def resolve_path(self,file_loc): url_comps = urlparse(file_loc) if url_comps[0]: return file_loc #already full URI new_url = urljoin(self.my_uri,file_loc) #print("Transformed %s to %s for import " % (file_loc,new_url)) return new_url def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'): """Register the imported dictionaries but do not alter any definitions. `parent_block` contains the id of the block that is importing. `import_target` is the block that should be imported. `import_from` is the CifFile that contains the definitions.""" if mode == 'contents': #merge attributes only self[parent_block].add_dict_cache(file_loc,import_from) elif mode =="full": # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False # Figure out the actual definition ID head_id = import_target["_definition.id"] # Adjust parent information merging_cat = self[parent_block]['_name.object_id'] from_cat_head = import_target['_name.object_id'] if not head_to_head: # imported category is only child import_target["_name.category_id"]=merging_cat self._import_dics = [(import_from,head_id)]+self._import_dics #prepend def lookup_imports(self,key): """Check the list of imported dictionaries for this definition""" for one_dic,head_def in self._import_dics: from_cat_head = one_dic[head_def]['_name.object_id'] possible_keys = one_dic.ddlm_all_children(from_cat_head) if key in possible_keys: return one_dic[key] raise KeyError("%s not found in import dictionaries" % key) def create_def_block_table(self): """ Create an internal table matching definition to block id """ proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()] # now get the actual ids instead of blocks proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table]) # remove non-definitions if self.diclang != "DDL1": top_blocks = list([a[0].lower() for a in self.get_roots()]) else: top_blocks = ["on_this_dictionary"] # catch dodgy duplicates uniques = set([a[0] for a in proto_table]) if len(uniques)<len(proto_table): def_names = list([a[0] for a in proto_table]) dodgy = [a for a in def_names if def_names.count(a)>1] raise CifError('Duplicate definitions in dictionary:' + repr(dodgy)) self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks]) def __getitem__(self,key): """Access a datablock by definition id, after the lookup has been created""" try: return super(CifDic,self).__getitem__(self.block_id_table[key.lower()]) except AttributeError: #block_id_table not present yet return super(CifDic,self).__getitem__(key) except KeyError: # key is missing try: # print(Definition for %s not found, reverting to CifFile' % key) return super(CifDic,self).__getitem__(key) except KeyError: # try imports return self.lookup_imports(key) def __setitem__(self,key,value): """Add a new definition block""" super(CifDic,self).__setitem__(key,value) try: self.block_id_table[value['_definition.id']]=key except AttributeError: #does not exist yet pass def NewBlock(self,*args,**kwargs): newname = super(CifDic,self).NewBlock(*args,**kwargs) try: self.block_id_table[self[newname]['_definition.id']]=newname except AttributeError: #no block_id table pass def __delitem__(self,key): """Remove a definition""" try: super(CifDic,self).__delitem__(self.block_id_table[key.lower()]) del self.block_id_table[key.lower()] except (AttributeError,KeyError): #block_id_table not present yet super(CifDic,self).__delitem__(key) return # fix other datastructures # cat_obj table def keys(self): """Return all definitions""" try: return self.block_id_table.keys() except AttributeError: return super(CifDic,self).keys() def has_key(self,key): return key in self def __contains__(self,key): try: return key.lower() in self.block_id_table except AttributeError: return super(CifDic,self).__contains__(key) def items(self): """Return (key,value) pairs""" return list([(a,self[a]) for a in self.keys()]) def unlock(self): """Allow overwriting of all definitions in this collection""" for a in self.keys(): self[a].overwrite=True def lock(self): """Disallow changes in definitions""" for a in self.keys(): self[a].overwrite=False def rename(self,oldname,newname,blockname_as_well=True): """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True, change the underlying blockname too.""" if blockname_as_well: super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname) self.block_id_table[newname.lower()]=newname if oldname.lower() in self.block_id_table: #not removed del self.block_id_table[oldname.lower()] else: self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()] del self.block_id_table[oldname.lower()] return def get_root_category(self): """Get the single 'Head' category of this dictionary""" root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head'] if len(root_cats)>1 or len(root_cats)==0: raise CifError("Cannot determine a unique Head category, got" % repr(root_cats)) return root_cats[0] def ddlm_immediate_children(self,catname): """Return a list of datanames for the immediate children of catname. These are semantic children (i.e. based on _name.category_id), not structural children as in the case of StarFile.get_immediate_children""" straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()] return list(straight_children) def ddlm_all_children(self,catname): """Return a list of all children, including the `catname`""" all_children = self.ddlm_immediate_children(catname) cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category'] for c in cat_children: all_children.remove(c) all_children += self.ddlm_all_children(c) return all_children + [catname] def is_semantic_child(self,parent,maybe_child): """Return true if `maybe_child` is a child of `parent`""" all_children = self.ddlm_all_children(parent) return maybe_child in all_children def ddlm_danglers(self): """Return a list of definitions that do not have a category defined for them, or are children of an unattached category""" top_block = self.get_root_category() connected = set(self.ddlm_all_children(top_block)) all_keys = set(self.keys()) unconnected = all_keys - connected return list(unconnected) def get_ddlm_parent(self,itemname): """Get the parent category of itemname""" parent = self[itemname].get('_name.category_id','') if parent == '': # use the top block by default raise CifError("%s has no parent" % itemname) return parent def expand_category_opt(self,name_list): """Return a list of all non-category items in a category or return the name if the name is not a category""" new_list = [] for name in name_list: if self.get(name,{}).get('_definition.scope','Item') == 'Category': new_list += self.expand_category_opt([a for a in self.keys() if \ self[a].get('_name.category_id','').lower() == name.lower()]) else: new_list.append(name) return new_list def get_categories(self): """Return a list of category names""" return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category']) def names_in_cat(self,cat,names_only=False): names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()] if not names_only: return list([a for a in names if self[a].get('_definition.scope','Item')=='Item']) else: return list([self[a]["_name.object_id"] for a in names]) def create_alias_table(self): """Populate an alias table that we can look up when searching for a dataname""" all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]] self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases]) def create_cat_obj_table(self): """Populate a table indexed by (cat,obj) and returning the correct dataname""" base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \ for a in self.keys() if self[a].get('_definition.scope','Item')=='Item']) loopable = self.get_loopable_cats() loopers = [self.ddlm_immediate_children(a) for a in loopable] print('Loopable cats:' + repr(loopable)) loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers] expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0]) print("Expansion list:" + repr(expand_list)) extra_table = {} #for debugging we keep it separate from base_table until the end def expand_base_table(parent_cat,child_cats): extra_names = [] # first deal with all the child categories for child_cat in child_cats: nn = [] if child_cat in expand_list: # a nested category: grab its names nn = expand_base_table(child_cat,expand_list[child_cat]) # store child names extra_names += nn # add all child names to the table child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key'] child_names += extra_names extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table])) # and the repeated ones get appended instead repeats = [a for a in child_names if a in extra_table] for obj,name in repeats: extra_table[(parent_cat,obj)] += [name] # and finally, add our own names to the return list child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key'] return child_names [expand_base_table(parent,child) for parent,child in expand_list.items()] print('Expansion cat/obj values: ' + repr(extra_table)) # append repeated ones non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table]) repeats = [a for a in extra_table.keys() if a in base_table] base_table.update(non_repeats) for k in repeats: base_table[k] += extra_table[k] self.cat_obj_lookup_table = base_table self.loop_expand_list = expand_list def get_loopable_cats(self): """A short utility function which returns a list of looped categories. This is preferred to a fixed attribute as that fixed attribute would need to be updated after any edits""" return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop'] def create_cat_key_table(self): """Create a utility table with a list of keys applicable to each category. A key is a compound key, that is, it is a list""" self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name", [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()]) def collect_keys(parent_cat,child_cats): kk = [] for child_cat in child_cats: if child_cat in self.loop_expand_list: kk += collect_keys(child_cat) # add these keys to our list kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))] self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk return kk for k,v in self.loop_expand_list.items(): collect_keys(k,v) print('Keys for categories' + repr(self.cat_key_table)) def add_type_info(self): if "_item_type_list.construct" in self.master_block: types = self.master_block["_item_type_list.code"] prim_types = self.master_block["_item_type_list.primitive_code"] constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]]) # add in \r wherever we see \n, and change \{ to \\{ def regex_fiddle(mm_regex): brack_match = r"((.*\[.+)(\\{)(.*\].*))" ret_match = r"((.*\[.+)(\\n)(.*\].*))" fixed_regexp = mm_regex[:] #copy # fix the brackets bm = re.match(brack_match,mm_regex) if bm != None: fixed_regexp = bm.expand(r"\2\\\\{\4") # fix missing \r rm = re.match(ret_match,fixed_regexp) if rm != None: fixed_regexp = rm.expand(r"\2\3\\r\4") #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp)) return fixed_regexp constructs = map(regex_fiddle,constructs) for typecode,construct in zip(types,constructs): self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL) # now make a primitive <-> type construct mapping for typecode,primtype in zip(types,prim_types): self.primdic[typecode] = primtype def add_category_info(self,full=True): if self.diclang == "DDLm": catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category'] looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop'] self.parent_lookup = {} for one_cat in looped_cats: parent_cat = one_cat parent_def = self[parent_cat] next_up = parent_def['_name.category_id'].lower() while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop': parent_def = self[next_up] parent_cat = next_up next_up = parent_def['_name.category_id'].lower() self.parent_lookup[one_cat] = parent_cat if full: self.key_equivs = {} for one_cat in looped_cats: #follow them up lower_keys = listify(self[one_cat]['_category_key.name']) start_keys = lower_keys[:] while len(lower_keys)>0: this_cat = self[lower_keys[0]]['_name.category_id'] parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a] #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent))) if len(parent)>1: raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent))) if len(parent)==0: break parent = parent[0] parent_keys = listify(self[parent]['_category_key.name']) linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys] # sanity check if set(parent_keys) != set(linked_keys): raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys)) # now add in our information for parent,child in zip(linked_keys,start_keys): self.key_equivs[child] = self.key_equivs.get(child,[])+[parent] lower_keys = linked_keys #preserves order of start keys else: self.parent_lookup = {} self.key_equivs = {} def change_category_name(self,oldname,newname): self.unlock() """Change the category name from [[oldname]] to [[newname]]""" if oldname not in self: raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname)) if newname in self: raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname)) child_defs = self.ddlm_immediate_children(oldname) self.rename(oldname,newname) #NB no name integrity checks self[newname]['_name.object_id']=newname self[newname]['_definition.id']=newname for child_def in child_defs: self[child_def]['_name.category_id'] = newname if self[child_def].get('_definition.scope','Item')=='Item': newid = self.create_catobj_name(newname,self[child_def]['_name.object_id']) self[child_def]['_definition.id']=newid self.rename(child_def,newid[1:]) #no underscore at the beginning self.lock() def create_catobj_name(self,cat,obj): """Combine category and object in approved fashion to create id""" return ('_'+cat+'.'+obj) def change_category(self,itemname,catname): """Move itemname into catname, return new handle""" defid = self[itemname] if defid['_name.category_id'].lower()==catname.lower(): print('Already in category, no change') return itemname if catname not in self: #don't have it print('No such category %s' % catname) return itemname self.unlock() objid = defid['_name.object_id'] defid['_name.category_id'] = catname newid = itemname # stays the same for categories if defid.get('_definition.scope','Item') == 'Item': newid = self.create_catobj_name(catname,objid) defid['_definition.id']= newid self.rename(itemname,newid) self.set_parent(catname,newid) self.lock() return newid def change_name(self,one_def,newobj): """Change the object_id of one_def to newobj. This is not used for categories, but can be used for dictionaries""" if '_dictionary.title' not in self[one_def]: #a dictionary block newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj) self.unlock() self.rename(one_def,newid) self[newid]['_definition.id']=newid self[newid]['_name.object_id']=newobj else: self.unlock() newid = newobj self.rename(one_def,newobj) self[newid]['_dictionary.title'] = newid self.lock() return newid # Note that our semantic parent is given by catparent, but our syntactic parent is # always just the root block def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False): """Add a new category to the dictionary with name [[catname]]. If [[catparent]] is None, the category will be a child of the topmost 'Head' category or else the top data block. If [[is_loop]] is false, a Set category is created. If [[allow_dangler]] is true, the parent category does not have to exist.""" if catname in self: raise CifError('Attempt to add existing category %s' % catname) self.unlock() syntactic_root = self.get_roots()[0][0] if catparent is None: semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head'] if len(semantic_root)>0: semantic_root = semantic_root[0] else: semantic_root = syntactic_root else: semantic_root = catparent realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root) self.block_id_table[catname.lower()]=realname self[catname]['_name.object_id'] = catname if not allow_dangler or catparent is None: self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id'] else: self[catname]['_name.category_id'] = catparent self[catname]['_definition.id'] = catname self[catname]['_definition.scope'] = 'Category' if is_loop: self[catname]['_definition.class'] = 'Loop' else: self[catname]['_definition.class'] = 'Set' self[catname]['_description.text'] = 'No definition provided' self.lock() return catname def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False): """Add itemname to category [[catparent]]. If itemname contains periods, all text before the final period is ignored. If [[allow_dangler]] is True, no check for a parent category is made.""" self.unlock() if '.' in itemname: objname = itemname.split('.')[-1] else: objname = itemname objname = objname.strip('_') if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'): raise CifError('No category %s in dictionary' % catparent) fullname = '_'+catparent.lower()+'.'+objname print('New name: %s' % fullname) syntactic_root = self.get_roots()[0][0] realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change # update our dictionary structures self.block_id_table[fullname]=realname self[fullname]['_definition.id']=fullname self[fullname]['_name.object_id']=objname self[fullname]['_name.category_id']=catparent self[fullname]['_definition.class']='Datum' self[fullname]['_description.text']=def_text def remove_definition(self,defname): """Remove a definition from the dictionary.""" if defname not in self: return if self[defname].get('_definition.scope')=='Category': children = self.ddlm_immediate_children(defname) [self.remove_definition(a) for a in children] cat_id = self[defname]['_definition.id'].lower() del self[defname] def get_cat_obj(self,name): """Return (cat,obj) tuple. [[name]] must contain only a single period""" cat,obj = name.split('.') return (cat.strip('_'),obj) def get_name_by_cat_obj(self,category,object,give_default=False): """Return the dataname corresponding to the given category and object""" if category[0] == '_': #accidentally left in true_cat = category[1:].lower() else: true_cat = category.lower() try: return self.cat_obj_lookup_table[(true_cat,object.lower())][0] except KeyError: if give_default: return '_'+true_cat+'.'+object raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object)) def WriteOut(self,**kwargs): myblockorder = self.get_full_child_list() self.set_grammar(self.grammar) self.standard = 'Dic' return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs) def get_full_child_list(self): """Return a list of definition blocks in order parent-child-child-child-parent-child...""" top_block = self.get_roots()[0][0] root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head'] if len(root_cat) == 1: all_names = [top_block] + self.recurse_child_list(root_cat[0]) unrooted = self.ddlm_danglers() double_names = set(unrooted).intersection(set(all_names)) if len(double_names)>0: raise CifError('Names are children of internal and external categories:%s' % repr(double_names)) remaining = unrooted[:] for no_root in unrooted: if self[no_root].get('_definition.scope','Item')=='Category': all_names += [no_root] remaining.remove(no_root) these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()] all_names += these_children [remaining.remove(n) for n in these_children] # now sort by category ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining]) for e in ext_cats: cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e] [remaining.remove(n) for n in cat_items] all_names += cat_items if len(remaining)>0: print('WARNING: following items do not seem to belong to a category??') print(repr(remaining)) all_names += remaining print('Final block order: ' + repr(all_names)) return all_names raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead') def cat_from_name(self,one_name): """Guess the category from the name. This should be used only when this is not important semantic information, for example, when printing out""" (cat,obj) = one_name.split(".") if cat[0] == "_": cat = cat[1:] return cat def recurse_child_list(self,parentname): """Recursively expand the logical child list of [[parentname]]""" final_list = [parentname] child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()] child_blocks.sort() #we love alphabetical order child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item'] final_list += child_items child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category'] for child_cat in child_cats: final_list += self.recurse_child_list(child_cat) return final_list def get_key_pack(self,category,value,data): keyname = self[category][self.unique_spec] onepack = data.GetPackKey(keyname,value) return onepack def get_number_with_esd(numstring): numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' our_match = re.match(numb_re,numstring) if our_match: a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) else: return None,None if dot or q: return None,None #a dot or question mark if exp: #has exponent exp = exp.replace("d","e") # mop up old fashioned numbers exp = exp.replace("D","e") base_num = base_num + exp # print("Debug: have %s for base_num from %s" % (base_num,numstring)) base_num = float(base_num) # work out esd, if present. if esd: esd = float(esd[1:-1]) # no brackets if dad: # decimal point + digits esd = esd * (10 ** (-1* len(dad))) if exp: esd = esd * (10 ** (float(exp[1:]))) return base_num,esd def getmaxmin(self,rangeexp): regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' regexp = regexp + ":" + regexp regexp = re.match(regexp,rangeexp) try: minimum = regexp.group(1) maximum = regexp.group(7) except AttributeError: print("Can't match %s" % rangeexp) if minimum == None: minimum = "." else: minimum = float(minimum) if maximum == None: maximum = "." else: maximum = float(maximum) return maximum,minimum def initialise_drel(self): """Parse drel functions and prepare data structures in dictionary""" self.ddlm_parse_valid() #extract validity information from data block self.transform_drel() #parse the drel functions self.add_drel_funcs() #put the drel functions into the namespace def transform_drel(self): from .drel import drel_ast_yacc from .drel import py_from_ast import traceback parser = drel_ast_yacc.parser lexer = drel_ast_yacc.lexer my_namespace = self.keys() my_namespace = dict(zip(my_namespace,my_namespace)) # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")] derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \ and self[a].get("_name.category_id","")!= "function"] for derivable in derivable_list: target_id = derivable # reset the list of visible names for parser special_ids = [dict(zip(self.keys(),self.keys()))] print("Target id: %s" % derivable) drel_exprs = self[derivable]["_method.expression"] drel_purposes = self[derivable]["_method.purpose"] all_methods = [] if not isinstance(drel_exprs,list): drel_exprs = [drel_exprs] drel_purposes = [drel_purposes] for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs): if drel_purpose != 'Evaluation': continue drel_expr = "\n".join(drel_expr.splitlines()) # print("Transforming %s" % drel_expr) # List categories are treated differently... try: meth_ast = parser.parse(drel_expr+"\n",lexer=lexer) except: print('Syntax error in method for %s; leaving as is' % derivable) a,b = sys.exc_info()[:2] print((repr(a),repr(b))) print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout)) # reset the lexer lexer.begin('INITIAL') continue # Construct the python method cat_meth = False if self[derivable].get('_definition.scope','Item') == 'Category': cat_meth = True pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id, loopable=loop_info, cif_dic = self,cat_meth=cat_meth) all_methods.append(pyth_meth) if len(all_methods)>0: save_overwrite = self[derivable].overwrite self[derivable].overwrite = True self[derivable]["_method.py_expression"] = all_methods self[derivable].overwrite = save_overwrite #print("Final result:\n " + repr(self[derivable]["_method.py_expression"])) def add_drel_funcs(self): from .drel import drel_ast_yacc from .drel import py_from_ast funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function'] funcnames = [(self[a]["_name.object_id"], getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist] # create executable python code... parser = drel_ast_yacc.parser # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) for funcname,funcbody in funcnames: newline_body = "\n".join(funcbody.splitlines()) parser.target_id = funcname res_ast = parser.parse(newline_body) py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self) #print('dREL library function ->\n' + py_function) global_table = globals() exec(py_function, global_table) #add to namespace #print('Globals after dREL functions added:' + repr(globals())) self.ddlm_functions = globals() #for outside access @track_recursion def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True): key = start_key #starting value result = None #success is a non-None value default_result = False #we have not used a default value # check for aliases # check for an older form of a new value found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata] if len(found_it)>0: corrected_type = self.change_type(key,cifdata[found_it[0]]) return corrected_type # now do the reverse check - any alternative form alias_name = [a for a in self.alias_table.items() if key in a[1]] print('Aliases for %s: %s' % (key,repr(alias_name))) if len(alias_name)==1: key = alias_name[0][0] #actual definition name if key in cifdata: return self.change_type(key,cifdata[key]) found_it = [k for k in alias_name[0][1] if k in cifdata] if len(found_it)>0: return self.change_type(key,cifdata[found_it[0]]) elif len(alias_name)>1: raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name)) the_category = self[key]["_name.category_id"] cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] # store any default value in case we have a problem def_val = self[key].get("_enumeration.default","") def_index_val = self[key].get("_enumeration.def_index_id","") if len(has_cat_names)==0: # try category method cat_result = {} pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]] pulled_from_cats = [(k,[ self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']] ) for k in pulled_from_cats] pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]] if '_category_construct_local.type' in self[the_category]: print("**Now constructing category %s using DDLm attributes**" % the_category) try: cat_result = self.construct_category(the_category,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s (error)' % the_category) # Trying a pull-back when the category is partially populated # will not work, hence we test that cat_result has no keys if len(pulled_to_cats)>0 and len(cat_result)==0: print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats))) try: cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s from pullback information (error)' % the_category) if '_method.py_expression' in self[the_category] and key not in cat_result: print("**Now applying category method for %s in search of %s**" % (the_category,key)) cat_result = self.derive_item(the_category,cifdata,store_value=True) print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result)) # do we now have our value? if key in cat_result: return cat_result[key] # Recalculate in case it actually worked has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] the_funcs = self[key].get('_method.py_expression',"") if the_funcs: #attempt to calculate it #global_table = globals() #global_table.update(self.ddlm_functions) for one_func in the_funcs: print('Executing function for %s:' % key) #print(one_func) exec(one_func, globals()) #will access dREL functions, puts "pyfunc" in scope # print('in following global environment: ' + repr(global_table)) stored_setting = cifdata.provide_value cifdata.provide_value = True try: result = pyfunc(cifdata) except CifRecursionError as s: print(s) result = None except StarFile.StarDerivationError as s: print(s) result = None finally: cifdata.provide_value = stored_setting if result is not None: break #print("Function returned {!r}".format(result)) if result is None and allow_defaults: # try defaults if def_val: result = self.change_type(key,def_val) default_result = True elif def_index_val: #derive a default value index_vals = self[key]["_enumeration_default.index"] val_to_index = cifdata[def_index_val] #what we are keying on if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']: lcase_comp = True index_vals = [a.lower() for a in index_vals] # Handle loops if isinstance(val_to_index,list): if lcase_comp: val_to_index = [a.lower() for a in val_to_index] keypos = [index_vals.index(a) for a in val_to_index] result = [self[key]["_enumeration_default.value"][a] for a in keypos] else: if lcase_comp: val_to_index = val_to_index.lower() keypos = index_vals.index(val_to_index) #value error if no such value available result = self[key]["_enumeration_default.value"][keypos] default_result = True #flag that it must be extended result = self.change_type(key,result) print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index))) # read it in if result is None: #can't do anything else print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults))) raise StarFile.StarDerivationError(start_key) is_looped = False if self[the_category].get('_definition.class','Set')=='Loop': is_looped = True if len(has_cat_names)>0: #this category already exists if result is None or default_result: #need to create a list of values loop_len = len(cifdata[has_cat_names[0]]) out_result = [result]*loop_len result = out_result else: #nothing exists in this category, we can't store this at all print('Resetting result %s for %s to null list as category is empty' % (key,result)) result = [] # now try to insert the new information into the right place # find if items of this category already appear... # Never cache empty values if not (isinstance(result,list) and len(result)==0) and\ store_value: if self[key].get("_definition.scope","Item")=='Item': if is_looped: result = self.store_new_looped_value(key,cifdata,result,default_result) else: result = self.store_new_unlooped_value(key,cifdata,result) else: self.store_new_cat_values(cifdata,result,the_category) return result def store_new_looped_value(self,key,cifdata,result,default_result): """Store a looped value from the dREL system into a CifFile""" # try to change any matrices etc. to lists the_category = self[key]["_name.category_id"] out_result = result if result is not None and not default_result: # find any numpy arrays def conv_from_numpy(one_elem): if not hasattr(one_elem,'dtype'): if isinstance(one_elem,(list,tuple)): return StarFile.StarList([conv_from_numpy(a) for a in one_elem]) return one_elem if one_elem.size > 1: #so is not a float return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()]) else: try: return one_elem.item(0) except: return one_elem out_result = [conv_from_numpy(a) for a in result] # so out_result now contains a value suitable for storage cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if a in cifdata] print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names)) if len(has_cat_names)>0: #this category already exists cifdata[key] = out_result #lengths must match or else!! cifdata.AddLoopName(has_cat_names[0],key) else: cifdata[key] = out_result cifdata.CreateLoop([key]) print('Loop info:' + repr(cifdata.loops)) return out_result def store_new_unlooped_value(self,key,cifdata,result): """Store a single value from the dREL system""" if result is not None and hasattr(result,'dtype'): if result.size > 1: out_result = StarFile.StarList(result.tolist()) cifdata[key] = out_result else: cifdata[key] = result.item(0) else: cifdata[key] = result return result def construct_category(self,category,cifdata,store_value=True): """Construct a category using DDLm attributes""" con_type = self[category].get('_category_construct_local.type',None) if con_type == None: return {} if con_type == 'Pullback' or con_type == 'Filter': morphisms = self[category]['_category_construct_local.components'] morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat cats = [self[a]['_name.category_id'] for a in morphisms] cat_keys = [self[a]['_category.key_id'] for a in cats] cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat if con_type == 'Filter': int_filter = self[category].get('_category_construct_local.integer_filter',None) text_filter = self[category].get('_category_construct_local.text_filter',None) if int_filter is not None: morph_values.append([int(a) for a in int_filter]) if text_filter is not None: morph_values.append(text_filter) cat_values.append(range(len(morph_values[-1]))) # create the mathematical product filtered by equality of dataname values pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \ if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]] # now prepare for return if len(pullback_ids)==0: return {} newids = self[category]['_category_construct_local.new_ids'] fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids] if con_type == 'Pullback': final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids)) elif con_type == 'Filter': #simple filter final_results = {fullnewids[0]:[x[0] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) if store_value: self.store_new_cat_values(cifdata,final_results,category) return final_results def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True): """Each of the categories in source_categories are pullbacks that include the target_category""" target_key = self[target_category]['_category.key_id'] result = {target_key:[]} first_time = True # for each source category, determine which element goes to the target for sc in source_categories: components = self[sc]['_category_construct_local.components'] comp_cats = [self[c]['_name.category_id'] for c in components] new_ids = self[sc]['_category_construct_local.new_ids'] source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids] if len(components) == 2: # not a filter element_pos = comp_cats.index(target_category) old_id = source_ids[element_pos] print('Using %s to populate %s' % (old_id,target_key)) result[target_key].extend(cifdata[old_id]) # project through all identical names extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key]) # we only include keys that are common to all categories if first_time: result.update(extra_result) else: for k in extra_result.keys(): if k in result: print('Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids) if len(extra_result)>0 or source_ids[0] in cifdata: #something is present result[target_key].extend(cifdata[source_ids[0]]) for k in extra_result.keys(): if k in result: print('Reverse filter: Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: result[k]=extra_result[k] # Bonus derivation if there is a singleton filter if self[sc]['_category_construct_local.type'] == 'Filter': int_filter = self[sc].get('_category_construct_local.integer_filter',None) text_filter = self[sc].get('_category_construct_local.text_filter',None) if int_filter is not None: filter_values = int_filter else: filter_values = text_filter if len(filter_values)==1: #a singleton extra_dataname = self[sc]['_category_construct_local.components'][0] if int_filter is not None: new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]]) else: new_value = filter_values * len(cifdata[source_ids[0]]) if extra_dataname not in result: result[extra_dataname] = new_value else: result[extra_dataname].extend(new_value) else: raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type']) first_time = False # check for sanity - all dataname lengths must be identical datalen = len(set([len(a) for a in result.values()])) if datalen != 1: raise AssertionError('Failed to construct equal-length category items,'+ repr(result)) if store_value: print('Now storing ' + repr(result)) self.store_new_cat_values(cifdata,result,target_category) return result def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]): """Copy across datanames for which the from_category key equals [[key_vals]]""" result = {} s_names_in_cat = set(self.names_in_cat(from_category,names_only=True)) t_names_in_cat = set(self.names_in_cat(to_category,names_only=True)) can_project = s_names_in_cat & t_names_in_cat can_project -= set(skip_names) #already dealt with source_key = self[from_category]['_category.key_id'] print('Source dataname set: ' + repr(s_names_in_cat)) print('Target dataname set: ' + repr(t_names_in_cat)) print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project)) for project_name in can_project: full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0] full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0] if key_vals is None: try: result[full_to_name] = cifdata[full_from_name] except StarFile.StarDerivationError: pass else: all_key_vals = cifdata[source_key] filter_pos = [all_key_vals.index(a) for a in key_vals] try: all_data_vals = cifdata[full_from_name] except StarFile.StarDerivationError: pass result[full_to_name] = [all_data_vals[i] for i in filter_pos] return result def store_new_cat_values(self,cifdata,result,the_category): """Store the values in [[result]] into [[cifdata]]""" the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key'] double_names = [a for a in result.keys() if a in cifdata] if len(double_names)>0: already_present = [a for a in self.names_in_cat(the_category) if a in cifdata] if set(already_present) != set(result.keys()): print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys())))) return #check key values old_keys = set(cifdata[the_key]) common_keys = old_keys & set(result[the_key]) if len(common_keys)>0: print("Category %s not updated, key values in common:" % (common_keys)) return #extend result values with old values for one_name,one_value in result.items(): result[one_name].extend(cifdata[one_name]) for one_name, one_value in result.items(): try: self.store_new_looped_value(one_name,cifdata,one_value,False) except StarFile.StarError: print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value))) #put the key as the first item print('Fixing item order for {}'.format(repr(the_key))) for one_key in the_key: #should only be one cifdata.ChangeItemOrder(one_key,0) def generate_default_packet(self,catname,catkey,keyvalue): """Return a StarPacket with items from ``catname`` and a key value of ``keyvalue``""" newpack = StarPacket() for na in self.names_in_cat(catname): def_val = self[na].get("_enumeration.default","") if def_val: final_val = self.change_type(na,def_val) newpack.extend(final_val) setattr(newpack,na,final_val) if len(newpack)>0: newpack.extend(keyvalue) setattr(newpack,catkey,keyvalue) return newpack def switch_numpy(self,to_val): pass def change_type(self,itemname,inval): if inval == "?": return inval change_function = convert_type(self[itemname]) if isinstance(inval,list) and not isinstance(inval,StarFile.StarList): #from a loop newval = list([change_function(a) for a in inval]) else: newval = change_function(inval) return newval def install_validation_functions(self): """Install the DDL-appropriate validation checks""" if self.diclang != 'DDLm': # functions which check conformance self.item_validation_funs = [ self.validate_item_type, self.validate_item_esd, self.validate_item_enum, self.validate_enum_range, self.validate_looping ] # functions checking loop values self.loop_validation_funs = [ self.validate_loop_membership, self.validate_loop_key, self.validate_loop_references ] # where we need to look at other values self.global_validation_funs = [ self.validate_exclusion, self.validate_parent, self.validate_child, self.validate_dependents, self.validate_uniqueness ] # where only a full block will do self.block_validation_funs = [ self.validate_mandatory_category ] # removal is quicker with special checks self.global_remove_validation_funs = [ self.validate_remove_parent_child ] elif self.diclang == 'DDLm': self.item_validation_funs = [ self.validate_item_enum, self.validate_item_esd_ddlm, ] self.loop_validation_funs = [ self.validate_looping_ddlm, self.validate_loop_key_ddlm, self.validate_loop_membership ] self.global_validation_funs = [] self.block_validation_funs = [ self.check_mandatory_items, self.check_prohibited_items ] self.global_remove_validation_funs = [] self.optimize = False # default value self.done_parents = [] self.done_children = [] self.done_keys = [] def validate_item_type(self,item_name,item_value): def mymatch(m,a): res = m.match(a) if res != None: return res.group() else: return "" target_type = self[item_name].get(self.type_spec) if target_type == None: # e.g. a category definition return {"result":True} # not restricted in any way matchexpr = self.typedic[target_type] item_values = listify(item_value) #for item in item_values: #print("Type match " + item_name + " " + item + ":",) #skip dots and question marks check_all = [a for a in item_values if a !="." and a != "?"] check_all = [a for a in check_all if mymatch(matchexpr,a) != a] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def decide(self,result_list): """Construct the return list""" if len(result_list)==0: return {"result":True} else: return {"result":False,"bad_values":result_list} def validate_item_container(self, item_name,item_value): container_type = self[item_name]['_type.container'] item_values = listify(item_value) if container_type == 'Single': okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))] return decide(okcheck) if container_type in ('Multiple','List'): okcheck = [a for a in item_values if not isinstance(a,StarList)] return decide(okcheck) if container_type == 'Array': #A list with numerical values okcheck = [a for a in item_values if not isinstance(a,StarList)] first_check = decide(okcheck) if not first_check['result']: return first_check #num_check = [a for a in item_values if len([b for b in a if not isinstance def validate_item_esd(self,item_name,item_value): if self[item_name].get(self.primitive_type) != 'numb': return {"result":None} can_esd = self[item_name].get(self.esd_spec,"none") == "esd" if can_esd: return {"result":True} #must be OK! item_values = listify(item_value) check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None]) if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_item_esd_ddlm(self,item_name,item_value): if self[item_name].get('self.primitive_type') not in \ ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']: return {"result":None} can_esd = True if self[item_name].get('_type.purpose') != 'Measurand': can_esd = False item_values = listify(item_value) check_all = [get_number_with_esd(a)[1] for a in item_values] check_all = [v for v in check_all if (can_esd and v == None) or \ (not can_esd and v != None)] if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_enum_range(self,item_name,item_value): if "_item_range.minimum" not in self[item_name] and \ "_item_range.maximum" not in self[item_name]: return {"result":None} minvals = self[item_name].get("_item_range.minimum",default = ["."]) maxvals = self[item_name].get("_item_range.maximum",default = ["."]) def makefloat(a): if a == ".": return a else: return float(a) maxvals = map(makefloat, maxvals) minvals = map(makefloat, minvals) rangelist = list(zip(minvals,maxvals)) item_values = listify(item_value) def map_check(rangelist,item_value): if item_value == "?" or item_value == ".": return True iv,esd = get_number_with_esd(item_value) if iv==None: return None #shouldn't happen as is numb type for lower,upper in rangelist: #check the minima if lower == ".": lower = iv - 1 if upper == ".": upper = iv + 1 if iv > lower and iv < upper: return True if upper == lower and iv == upper: return True # debug # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper)) return False check_all = [a for a in item_values if map_check(rangelist,a) != True] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def validate_item_enum(self,item_name,item_value): try: enum_list = self[item_name][self.enum_spec][:] except KeyError: return {"result":None} enum_list.append(".") #default value enum_list.append("?") #unknown item_values = listify(item_value) #print("Enum check: {!r} in {!r}".format(item_values, enum_list)) check_all = [a for a in item_values if a not in enum_list] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def validate_looping(self,item_name,item_value): try: must_loop = self[item_name][self.must_loop_spec] except KeyError: return {"result":None} if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped return {"result":False} #this could be triggered if must_loop == 'no' and not isinstance(item_value,(unicode,str)): return {"result":False} return {"result":True} def validate_looping_ddlm(self,loop_names): """Check that all names are loopable""" truly_loopy = self.get_final_cats(loop_names) if len(truly_loopy)<len(loop_names): #some are bad categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names] not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()] return {"result":False,"bad_items":not_looped} return {"result":True} def validate_loop_membership(self,loop_names): final_cat = self.get_final_cats(loop_names) bad_items = [a for a in final_cat if a != final_cat[0]] if len(bad_items)>0: return {"result":False,"bad_items":bad_items} else: return {"result":True} def get_final_cats(self,loop_names): """Return a list of the uppermost parent categories for the loop_names. Names that are not from loopable categories are ignored.""" try: categories = [self[a][self.cat_spec].lower() for a in loop_names] except KeyError: #category_id is mandatory raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0])) truly_looped = [a for a in categories if a in self.parent_lookup.keys()] return [self.parent_lookup[a] for a in truly_looped] def validate_loop_key(self,loop_names): category = self[loop_names[0]][self.cat_spec] # find any unique values which must be present key_spec = self[category].get(self.key_spec,[]) for names_to_check in key_spec: if isinstance(names_to_check,unicode): #only one names_to_check = [names_to_check] for loop_key in names_to_check: if loop_key not in loop_names: #is this one of those dang implicit items? if self[loop_key].get(self.must_exist_spec,None) == "implicit": continue #it is virtually there... alternates = self.get_alternates(loop_key) if alternates == []: return {"result":False,"bad_items":loop_key} for alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":loop_key} # no alternates return {"result":True} def validate_loop_key_ddlm(self,loop_names): """Make sure at least one of the necessary keys are available""" final_cats = self.get_final_cats(loop_names) if len(final_cats)>0: poss_keys = self.cat_key_table[final_cats[0]][0] # found_keys = [a for a in poss_keys if a in loop_names] if len(found_keys)>0: return {"result":True} else: return {"result":False,"bad_items":poss_keys} else: return {"result":True} def validate_loop_references(self,loop_names): must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names] must_haves = [a for a in must_haves if a != None] # build a flat list. For efficiency we don't remove duplicates,as # we expect no more than the order of 10 or 20 looped names. def flat_func(a,b): if isinstance(b,unicode): a.append(b) #single name else: a.extend(b) #list of names return a flat_mh = [] [flat_func(flat_mh,a) for a in must_haves] group_mh = filter(lambda a:a[-1]=="_",flat_mh) single_mh = filter(lambda a:a[-1]!="_",flat_mh) res = [a for a in single_mh if a not in loop_names] def check_gr(s_item, name_list): nl = map(lambda a:a[:len(s_item)],name_list) if s_item in nl: return True return False res_g = [a for a in group_mh if check_gr(a,loop_names)] if len(res) == 0 and len(res_g) == 0: return {"result":True} # construct alternate list alternates = map(lambda a: (a,self.get_alternates(a)),res) alternates = [a for a in alternates if a[1] != []] # next line purely for error reporting missing_alts = [a[0] for a in alternates if a[1] == []] if len(alternates) != len(res): return {"result":False,"bad_items":missing_alts} #short cut; at least one #doesn't have an altern #loop over alternates for orig_name,alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates return {"result":True} #found alternates def get_alternates(self,main_name,exclusive_only=False): alternates = self[main_name].get(self.related_func,None) alt_names = [] if alternates != None: alt_names = self[main_name].get(self.related_item,None) if isinstance(alt_names,unicode): alt_names = [alt_names] alternates = [alternates] together = zip(alt_names,alternates) if exclusive_only: alt_names = [a for a in together if a[1]=="alternate_exclusive" \ or a[1]=="replace"] else: alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"] alt_names = list([a[0] for a in alt_names]) # now do the alias thing alias_names = listify(self[main_name].get("_item_aliases.alias_name",[])) alt_names.extend(alias_names) # print("Alternates for {}: {!r}".format(main_name, alt_names)) return alt_names def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}): alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)] item_name_list = [a.lower() for a in whole_block.keys()] item_name_list.extend([a.lower() for a in provisional_items.keys()]) bad = [a for a in alternates if a in item_name_list] if len(bad)>0: print("Bad: %s, alternates %s" % (repr(bad),repr(alternates))) return {"result":False,"bad_items":bad} else: return {"result":True} # validate that parent exists and contains matching values def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}): parent_item = self[item_name].get(self.parent_spec) if not parent_item: return {"result":None} #no parent specified if isinstance(parent_item,list): parent_item = parent_item[0] if self.optimize: if parent_item in self.done_parents: return {"result":None} else: self.done_parents.append(parent_item) print("Done parents %s" % repr(self.done_parents)) # initialise parent/child values if isinstance(item_value,unicode): child_values = [item_value] else: child_values = item_value[:] #copy for safety # track down the parent # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block)) # if globals contains the parent values, we are doing a DDL2 dictionary, and so # we have collected all parent values into the global block - so no need to search # for them elsewhere. # print("Looking for {!r}".format(parent_item)) parent_values = globals.get(parent_item) if not parent_values: parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # go for alternates namespace = whole_block.keys() namespace.extend(provisional_items.keys()) namespace.extend(globals.keys()) alt_names = filter_present(self.get_alternates(parent_item),namespace) if len(alt_names) == 0: if len([a for a in child_values if a != "." and a != "?"])>0: return {"result":False,"parent":parent_item}#no parent available -> error else: return {"result":None} #maybe True is more appropriate?? parent_item = alt_names[0] #should never be more than one?? parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # check global block parent_values = globals.get(parent_item) if isinstance(parent_values,unicode): parent_values = [parent_values] #print("Checking parent %s against %s, values %r/%r" % (parent_item, # item_name, parent_values, child_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing) > 0: return {"result":False,"bad_values":missing,"parent":parent_item} return {"result":True} def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}): try: child_items = self[item_name][self.child_spec][:] #copy except KeyError: return {"result":None} #not relevant # special case for dictionaries -> we check parents of children only if item_name in globals: #dictionary so skip return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] if isinstance(item_value,unicode): # single value parent_values = [item_value] else: parent_values = item_value[:] # expand child list with list of alternates for child_item in child_items[:]: child_items.extend(self.get_alternates(child_item)) # now loop over the children for child_item in child_items: if self.optimize: if child_item in self.done_children: return {"result":None} else: self.done_children.append(child_item) print("Done children %s" % repr(self.done_children)) if child_item in provisional_items: child_values = provisional_items[child_item][:] elif child_item in whole_block: child_values = whole_block[child_item][:] else: continue if isinstance(child_values,unicode): child_values = [child_values] # print("Checking child %s against %s, values %r/%r" % (child_item, # item_name, child_values, parent_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing)>0: return {"result":False,"bad_values":missing,"child":child_item} return {"result":True} #could mean that no child items present #a generic checker: all child vals should appear in parent_vals def check_parent_child(self,parent_vals,child_vals): # shield ourselves from dots and question marks pv = parent_vals[:] pv.extend([".","?"]) res = [a for a in child_vals if a not in pv] #print("Missing: %s" % res) return res def validate_remove_parent_child(self,item_name,whole_block): try: child_items = self[item_name][self.child_spec] except KeyError: return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] for child_item in child_items: if child_item in whole_block: return {"result":False,"child":child_item} return {"result":True} def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}): try: dep_items = self[item_name][self.dep_spec][:] except KeyError: return {"result":None} #not relevant if isinstance(dep_items,unicode): dep_items = [dep_items] actual_names = whole_block.keys() actual_names.extend(prov.keys()) actual_names.extend(globals.keys()) missing = [a for a in dep_items if a not in actual_names] if len(missing) > 0: alternates = map(lambda a:[self.get_alternates(a),a],missing) # compact way to get a list of alternative items which are # present have_check = [(filter_present(b[0],actual_names), b[1]) for b in alternates] have_check = list([a for a in have_check if len(a[0])==0]) if len(have_check) > 0: have_check = [a[1] for a in have_check] return {"result":False,"bad_items":have_check} return {"result":True} def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={}, globals={}): category = self[item_name].get(self.cat_spec) if category == None: print("No category found for %s" % item_name) return {"result":None} # print("Category {!r} for item {}".format(category, item_name)) # we make a copy in the following as we will be removing stuff later! unique_i = self[category].get("_category_key.name",[])[:] if isinstance(unique_i,unicode): unique_i = [unique_i] if item_name not in unique_i: #no need to verify return {"result":None} if isinstance(item_value,unicode): #not looped return {"result":None} # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i)) # check that we can't optimize by not doing this check if self.optimize: if unique_i in self.done_keys: return {"result":None} else: self.done_keys.append(unique_i) val_list = [] # get the matching data from any other data items unique_i.remove(item_name) other_data = [] if len(unique_i) > 0: # i.e. do have others to think about for other_name in unique_i: # we look for the value first in the provisional dict, then the main block # the logic being that anything in the provisional dict overrides the # main block if other_name in provisional_items: other_data.append(provisional_items[other_name]) elif other_name in whole_block: other_data.append(whole_block[other_name]) elif self[other_name].get(self.must_exist_spec)=="implicit": other_data.append([item_name]*len(item_value)) #placeholder else: return {"result":False,"bad_items":other_name}#missing data name # ok, so we go through all of our values # this works by comparing lists of strings to one other, and # so could be fooled if you think that '1.' and '1' are # identical for i in range(len(item_value)): #print("Value no. %d" % i, end=" ") this_entry = item_value[i] for j in range(len(other_data)): this_entry = " ".join([this_entry,other_data[j][i]]) #print("Looking for {!r} in {!r}: ".format(this_entry, val_list)) if this_entry in val_list: return {"result":False,"bad_values":this_entry} val_list.append(this_entry) return {"result":True} def validate_mandatory_category(self,whole_block): mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"] if len(mand_cats) == 0: return {"result":True} # print("Mandatory categories - {!r}".format(mand_cats) # find which categories each of our datanames belongs to all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()] missing = set(mand_cats) - set(all_cats) if len(missing) > 0: return {"result":False,"bad_items":repr(missing)} return {"result":True} def check_mandatory_items(self,whole_block,default_scope='Item'): """Return an error if any mandatory items are missing""" if len(self.scopes_mandatory)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block]) if len(missing)==0: return {"result":True} else: return {"result":False,"bad_items":missing} def check_prohibited_items(self,whole_block,default_scope='Item'): """Return an error if any prohibited items are present""" if len(self.scopes_naughty)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' present = list([a for a in self.scopes_naughty[scope] if a in whole_block]) if len(present)==0: return {"result":True} else: return {"result":False,"bad_items":present} def run_item_validation(self,item_name,item_value): return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])} def run_loop_validation(self,loop_names): return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])} def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}): results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs]) return {item_name:results} def run_block_validation(self,whole_block,block_scope='Item'): results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs]) # fix up the return values return {"whole_block":results} def optimize_on(self): self.optimize = True self.done_keys = [] self.done_children = [] self.done_parents = [] def optimize_off(self): self.optimize = False self.done_keys = [] self.done_children = [] self.done_parents = []
Ancestors (in MRO)
- CifDic
- CifFile.StarFile.StarFile
- CifFile.StarFile.BlockCollection
- __builtin__.object
Methods
def DDL1_normalise(
self)
def DDL1_normalise(self): # switch off block name collision checks self.standard = None # add default type information in DDL2 style # initial types and constructs base_types = ["char","numb","null"] prim_types = base_types[:] base_constructs = [".*", '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.', "\"\" "] for key,value in self.items(): newnames = [key] #keep by default if "_name" in value: real_name = value["_name"] if isinstance(real_name,list): #looped values for looped_name in real_name: new_value = value.copy() new_value["_name"] = looped_name #only looped name self[looped_name] = new_value newnames = real_name else: self[real_name] = value newnames = [real_name] # delete the old one if key not in newnames: del self[key] # loop again to normalise the contents of each definition for key,value in self.items(): #unlock the block save_overwrite = value.overwrite value.overwrite = True # deal with a missing _list, _type_conditions if "_list" not in value: value["_list"] = 'no' if "_type_conditions" not in value: value["_type_conditions"] = 'none' # deal with enumeration ranges if "_enumeration_range" in value: max,min = self.getmaxmin(value["_enumeration_range"]) if min == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min)))) elif max == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min)))) else: self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min)))) #add any type construct information if "_type_construct" in value: base_types.append(value["_name"]+"_type") #ie dataname_type base_constructs.append(value["_type_construct"]+"$") prim_types.append(value["_type"]) #keep a record value["_type"] = base_types[-1] #the new type name #make categories conform with ddl2 #note that we must remove everything from the last underscore if value.get("_category",None) == "category_overview": last_under = value["_name"].rindex("_") catid = value["_name"][1:last_under] value["_category.id"] = catid #remove square bracks if catid not in self.cat_list: self.cat_list.append(catid) value.overwrite = save_overwrite # we now add any missing categories before filling in the rest of the # information for key,value in self.items(): #print('processing ddl1 definition %s' % key) if "_category" in self[key]: if self[key]["_category"] not in self.cat_list: # rogue category, add it in newcat = self[key]["_category"] fake_name = "_" + newcat + "_[]" newcatdata = CifBlock() newcatdata["_category"] = "category_overview" newcatdata["_category.id"] = newcat newcatdata["_type"] = "null" self[fake_name] = newcatdata self.cat_list.append(newcat) # write out the type information in DDL2 style self.master_block.AddLoopItem(( ("_item_type_list.code","_item_type_list.construct", "_item_type_list.primitive_code"), (base_types,base_constructs,prim_types) ))
def DDL2_normalise(
self)
def DDL2_normalise(self): listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys()) # now filter out all the single element lists! dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs) for item_def in dodgy_defs: # print("DDL2 norm: processing %s" % item_def) thisdef = self[item_def] packet_no = thisdef['_item.name'].index(item_def) realcat = thisdef['_item.category_id'][packet_no] realmand = thisdef['_item.mandatory_code'][packet_no] # first add in all the missing categories # we don't replace the entry in the list corresponding to the # current item, as that would wipe out the information we want for child_no in range(len(thisdef['_item.name'])): if child_no == packet_no: continue child_name = thisdef['_item.name'][child_no] child_cat = thisdef['_item.category_id'][child_no] child_mand = thisdef['_item.mandatory_code'][child_no] if child_name not in self: self[child_name] = CifBlock() self[child_name]['_item.name'] = child_name self[child_name]['_item.category_id'] = child_cat self[child_name]['_item.mandatory_code'] = child_mand self[item_def]['_item.name'] = item_def self[item_def]['_item.category_id'] = realcat self[item_def]['_item.mandatory_code'] = realmand target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \ '_item_linked.parent_name' in self[a]] # now dodgy_defs contains all definition blocks with more than one child/parent link for item_def in dodgy_defs: self.create_pcloop(item_def) #regularise appearance for item_def in dodgy_defs: print('Processing %s' % item_def) thisdef = self[item_def] child_list = thisdef['_item_linked.child_name'] parents = thisdef['_item_linked.parent_name'] # for each parent, find the list of children. family = list(zip(parents,child_list)) notmychildren = family #We aim to remove non-children # Loop over the parents, relocating as necessary while len(notmychildren): # get all children of first entry mychildren = [a for a in family if a[0]==notmychildren[0][0]] print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren))) for parent,child in mychildren: #parent is the same for all # Make sure that we simply add in the new entry for the child, not replace it, # otherwise we might spoil the child entry loop structure try: childloop = self[child].GetLoop('_item_linked.parent_name') except KeyError: print('Creating new parent entry %s for definition %s' % (parent,child)) self[child]['_item_linked.parent_name'] = [parent] childloop = self[child].GetLoop('_item_linked.parent_name') childloop.AddLoopItem(('_item_linked.child_name',[child])) continue else: # A parent loop already exists and so will a child loop due to the # call to create_pcloop above pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child] goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent] if len(goodpars)>0: #no need to add it print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child)) continue print('Adding %s to %s entry' % (parent,child)) newpacket = childloop.GetPacket(0) #essentially a copy, I hope setattr(newpacket,'_item_linked.child_name',child) setattr(newpacket,'_item_linked.parent_name',parent) childloop.AddPacket(newpacket) # # Make sure the parent also points to the children. We get # the current entry, then add our # new values if they are not there already # parent_name = mychildren[0][0] old_children = self[parent_name].get('_item_linked.child_name',[]) old_parents = self[parent_name].get('_item_linked.parent_name',[]) oldfamily = zip(old_parents,old_children) newfamily = [] print('Old parents -> %s' % repr(old_parents)) for jj, childname in mychildren: alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname] if len(alreadythere)>0: continue 'Adding new child %s to parent definition at %s' % (childname,parent_name) old_children.append(childname) old_parents.append(parent_name) # Now output the loop, blowing away previous definitions. If there is something # else in this category, we are destroying it. newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) del self[parent_name]['_item_linked.parent_name'] del self[parent_name]['_item_linked.child_name'] self[parent_name].insert_loop(newloop) print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name'])) # now make a new,smaller list notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]] # now flatten any single element lists single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs) for flat_def in single_defs: flat_keys = self[flat_def].GetLoop('_item.name').keys() for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0] # now deal with the multiple lists # next we do aliases all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')] for aliased in all_aliases: my_aliases = listify(self[aliased]['_item_aliases.alias_name']) for alias in my_aliases: self[alias] = self[aliased].copy() #we are going to delete stuff... del self[alias]["_item_aliases.alias_name"]
def NewBlock(
self, *args, **kwargs)
def NewBlock(self,*args,**kwargs): newname = super(CifDic,self).NewBlock(*args,**kwargs) try: self.block_id_table[self[newname]['_definition.id']]=newname except AttributeError: #no block_id table pass
def SetTemplate(
self, template_file)
Use template_file
as a template for all block output
def SetTemplate(self,template_file): """Use `template_file` as a template for all block output""" self.master_template = process_template(template_file) for b in self.dictionary.values(): b.formatting_hints = self.master_template
def WriteOut(
self, **kwargs)
def WriteOut(self,**kwargs): myblockorder = self.get_full_child_list() self.set_grammar(self.grammar) self.standard = 'Dic' return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)
class CifError
class CifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Format error: '+ self.value
Ancestors (in MRO)
- CifError
- exceptions.Exception
- exceptions.BaseException
- __builtin__.object
class CifFile
class CifFile(StarFile.StarFile): def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs): super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs) self.strict = strict self.header_comment = \ """ ########################################################################## # Crystallographic Information Format file # Produced by PyCifRW module # # This is a CIF file. CIF has been adopted by the International # Union of Crystallography as the standard for data archiving and # transmission. # # For information on this file format, follow the CIF links at # http://www.iucr.org ########################################################################## """
Ancestors (in MRO)
- CifFile
- CifFile.StarFile.StarFile
- CifFile.StarFile.BlockCollection
- __builtin__.object
Methods
def NewBlock(
self, blockname, blockcontents=None, fix=True, parent=None)
Add a new block named blockname
with contents blockcontents
. If fix
is True, blockname
will have spaces and tabs replaced by underscores. parent
allows a parent block to be set so that block hierarchies can be created. Depending on
the output standard, these blocks will be printed out as nested save frames or
ignored.
def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None): """Add a new block named `blockname` with contents `blockcontents`. If `fix` is True, `blockname` will have spaces and tabs replaced by underscores. `parent` allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.""" if blockcontents is None: blockcontents = self.blocktype() if self.standard == "CIF": blockcontents.setmaxnamelength(75) if len(blockname)>75: raise StarError('Blockname %s is longer than 75 characters' % blockname) if fix: newblockname = re.sub('[ \t]','_',blockname) else: newblockname = blockname new_lowerbn = newblockname.lower() if new_lowerbn in self.lower_keys: #already there if self.standard is not None: toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None] if parent is None and new_lowerbn not in toplevelnames: #can give a new key to this one while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+' elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one replace_name = new_lowerbn while replace_name in self.lower_keys: replace_name = replace_name + '+' self._rekey(new_lowerbn,replace_name) # now continue on to add in the new block if parent.lower() == new_lowerbn: #the new block's requested parent just got renamed!! parent = replace_name else: raise StarError( "Attempt to replace existing block " + blockname) else: del self[new_lowerbn] self.dictionary.update({new_lowerbn:blockcontents}) self.lower_keys.add(new_lowerbn) self.block_input_order.append(new_lowerbn) if parent is None: self.child_table[new_lowerbn]=self.PC(newblockname,None) self.visible_keys.append(new_lowerbn) else: if parent.lower() in self.lower_keys: if self.scoping == 'instance': self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) else: self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) self.visible_keys.append(new_lowerbn) else: print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname)) self[new_lowerbn].set_grammar(self.grammar) self[new_lowerbn].set_characterset(self.characterset) self[new_lowerbn].formatting_hints = self.master_template return new_lowerbn #in case calling routine wants to know
def SetTemplate(
self, template_file)
Use template_file
as a template for all block output
def SetTemplate(self,template_file): """Use `template_file` as a template for all block output""" self.master_template = process_template(template_file) for b in self.dictionary.values(): b.formatting_hints = self.master_template
def WriteOut(
self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)
Return the contents of this file as a string, wrapping if possible at wraplength
characters and restricting maximum line length to maxoutlength
. Delimiters and
save frame nesting are controlled by self.grammar
. If blockorder
is
provided, blocks are output in this order unless nested save frames have been
requested (STAR2). The default block order is the order in which blocks were input.
saves_after
inserts all save frames after the given dataname,
which allows less important items to appear later. Useful in conjunction with a
template for dictionary files.
def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None): """Return the contents of this file as a string, wrapping if possible at `wraplength` characters and restricting maximum line length to `maxoutlength`. Delimiters and save frame nesting are controlled by `self.grammar`. If `blockorder` is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. `saves_after` inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.""" if maxoutlength != 0: self.SetOutputLength(maxoutlength) if not comment: comment = self.header_comment outstring = StringIO() if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0": outstring.write(r"#\#CIF_2.0" + "\n") outstring.write(comment) # prepare all blocks for b in self.dictionary.values(): b.set_grammar(self.grammar) b.formatting_hints = self.master_template b.SetOutputLength(wraplength,self.maxoutlength) # loop over top-level # monitor output all_names = list(self.child_table.keys()) #i.e. lower case if blockorder is None: blockorder = self.block_input_order top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None] for blockref,blockname in top_block_names: print('Writing %s, ' % blockname + repr(self[blockref])) outstring.write('\n' + 'data_' +blockname+'\n') all_names.remove(blockref) if self.standard == 'Dic': #put contents before save frames outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application')) if self.grammar == 'STAR2': #nested save frames child_refs = self.get_immediate_children(blockref) for child_ref,child_info in child_refs: child_name = child_info.block_id outstring.write('\n\n' + 'save_' + child_name + '\n') self.block_to_string_nested(child_ref,child_name,outstring,4) outstring.write('\n' + 'save_'+ '\n') elif self.grammar in ('1.0','1.1','2.0'): #non-nested save frames child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)] for child_ref in child_refs: child_name = self.child_table[child_ref].block_id outstring.write('\n\n' + 'save_' + child_name + '\n') outstring.write(str(self[child_ref])) outstring.write('\n\n' + 'save_' + '\n') all_names.remove(child_ref.lower()) else: raise StarError('Grammar %s is not recognised for output' % self.grammar) if self.standard != 'Dic': #put contents after save frames outstring.write(str(self[blockref])) else: outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application')) returnstring = outstring.getvalue() outstring.close() if len(all_names)>0: print('WARNING: following blocks not output: %s' % repr(all_names)) else: print('All blocks output.') return returnstring
class CifLoopBlock
class CifLoopBlock(StarFile.LoopBlock): def __init__(self,data=(),**kwargs): super(CifLoopBlock,self).__init__(data,**kwargs)
Ancestors (in MRO)
- CifLoopBlock
- CifFile.StarFile.LoopBlock
- __builtin__.object
Methods
def AddPacket(
self, packet)
def AddPacket(self,packet): for myitem in self.parent_block.loops[self.loop_no]: old_values = self.parent_block[myitem] old_values.append(packet.__getattribute__(myitem)) self.parent_block[myitem] = old_values
def AddToLoop(
self, dataname, loopdata)
Deprecated. Use AddItem
followed by calls to AddLoopName
.
Add multiple columns to the loop containing dataname
. loopdata
is a
collection of (key,value) pairs, where key
is the new dataname and value
is a list of values for that dataname
def AddToLoop(self,dataname,loopdata): """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. Add multiple columns to the loop containing `dataname`. `loopdata` is a collection of (key,value) pairs, where `key` is the new dataname and `value` is a list of values for that dataname""" # check lengths thisloop = self.FindLoop(dataname) loop_len = len(self[dataname]) bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] if len(bad_vals)>0: raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ % (repr( bad_vals ),loop_len)) self.update(loopdata) self.loops[thisloop]+=loopdata.keys()
def ChangeItemOrder(
self, itemname, newpos)
Change the position at which itemname
appears when printing out to newpos
.
def ChangeItemOrder(self,itemname,newpos): """Change the position at which `itemname` appears when printing out to `newpos`.""" self.parent_block.loops[self.loop_no].remove(itemname.lower()) self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())
def GetItemOrder(
self)
Return a list of datanames in this LoopBlock
in the order that they will be
printed
def GetItemOrder(self): """Return a list of datanames in this `LoopBlock` in the order that they will be printed""" return self.parent_block.loops[self.loop_no][:]
def GetItemPosition(
self, itemname)
A utility function to get the numerical order in the printout
of itemname
. An item has coordinate (loop_no,pos)
with
the top level having a loop_no
of -1. If an integer is passed to
the routine then it will return the position of the loop
referenced by that number.
def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos
def GetLoop(
self, keyname)
Return a StarFile.LoopBlock
object constructed from the loop containing keyname
.
keyname
is only significant as a way to specify the loop.
def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname)
def GetLoopNames(
self, keyname)
Return all datanames appearing together with keyname
def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname)
def GetPacket(
self, index)
def GetPacket(self,index): thispack = StarPacket([]) for myitem in self.parent_block.loops[self.loop_no]: thispack.append(self[myitem][index]) setattr(thispack,myitem,thispack[-1]) return thispack
def RemoveItem(
self, itemname)
Remove itemname
from the block.
def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey)
def RemoveLoopItem(
self, itemname)
Deprecated. Use RemoveItem
instead
def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname)
class CifRecursionError
class CifRecursionError(Exception): def __init__(self,key_value,call_stack): self.key_value = key_value self.call_stack = call_stack def __str__(self): return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))
Ancestors (in MRO)
- CifRecursionError
- exceptions.Exception
- exceptions.BaseException
- __builtin__.object
class DicBlock
A definition block within a dictionary, which allows imports to be transparently followed
class DicBlock(StarFile.StarBlock): """A definition block within a dictionary, which allows imports to be transparently followed""" def __init__(self,*args,**kwargs): super(DicBlock,self).__init__(*args,**kwargs) self._import_cache = {} def __getitem__(self,dataname): value = None if super(DicBlock,self).has_key("_import.get") and self._import_cache: value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) try: final_value = super(DicBlock,self).__getitem__(dataname) except KeyError: #not there final_value = value if final_value is None: raise KeyError("%s not found" % dataname) return final_value def has_key(self,key): try: self[key] except KeyError: return False return True def add_dict_cache(self,name,cached): """Add a loaded dictionary to this block's cache""" self._import_cache[name]=cached def follow_import(self,import_info,dataname): """Find the dataname values from the imported dictionary. `import_info` is a list of import locations""" latest_value = None for import_ref in import_info: file_loc = import_ref["file"] if file_loc not in self._import_cache: raise ValueError("Dictionary for import %s not found" % file_loc) import_from = self._import_cache[file_loc] miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,file_loc)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if mode == "contents": #only this is used at this level latest_value = import_target.get(dataname,latest_value) return latest_value
Ancestors (in MRO)
- DicBlock
- CifFile.StarFile.StarBlock
- __builtin__.object
Methods
def AddItem(
self, key, value, precheck=False)
Add dataname key
to block with value value
. value
may be
a single value, a list or a tuple. If precheck
is False (the default),
all values will be checked and converted to unicode strings as necessary. If
precheck
is True, this checking is bypassed. No checking is necessary
when values are read from a CIF file as they are already in correct form.
def AddItem(self,key,value,precheck=False): """Add dataname `key` to block with value `value`. `value` may be a single value, a list or a tuple. If `precheck` is False (the default), all values will be checked and converted to unicode strings as necessary. If `precheck` is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.""" if not isinstance(key,(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( key )) key = unicode(key) #everything is unicode internally if not precheck: self.check_data_name(key,self.maxnamelength) # make sure no nasty characters # check for overwriting if key in self: if not self.overwrite: raise StarError( 'Attempt to insert duplicate item name %s' % key) if not precheck: #need to sanitise regval,empty_val = self.regularise_data(value) pure_string = check_stringiness(regval) self.check_item_value(regval) else: regval,empty_val = value,None pure_string = True # update ancillary information first lower_key = key.lower() if not lower_key in self and self.FindLoop(lower_key)<0: #need to add to order self.item_order.append(lower_key) # always remove from our case table in case the case is different try: del self.true_case[lower_key] except KeyError: pass self.true_case[lower_key] = key if pure_string: self.block.update({lower_key:[regval,empty_val]}) else: self.block.update({lower_key:[empty_val,regval]})
def AddLoopItem(
self, incomingdata, precheck=False, maxlength=-1)
Deprecated. Use AddItem
followed by CreateLoop
if
necessary.
def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): """*Deprecated*. Use `AddItem` followed by `CreateLoop` if necessary.""" # print "Received data %s" % `incomingdata` # we accept tuples, strings, lists and dicts!! # Direct insertion: we have a string-valued key, with an array # of values -> single-item into our loop if isinstance(incomingdata[0],(tuple,list)): # a whole loop keyvallist = zip(incomingdata[0],incomingdata[1]) for key,value in keyvallist: self.AddItem(key,value) self.CreateLoop(incomingdata[0]) elif not isinstance(incomingdata[0],(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) else: self.AddItem(incomingdata[0],incomingdata[1])
def AddLoopName(
self, oldname, newname)
Add newname
to the loop containing oldname
. If it is already in the new loop, no
error is raised. If newname
is in a different loop, it is removed from that loop.
The number of values associated with newname
must match the number of values associated
with all other columns of the new loop or a ValueError
will be raised.
def AddLoopName(self,oldname, newname): """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no error is raised. If `newname` is in a different loop, it is removed from that loop. The number of values associated with `newname` must match the number of values associated with all other columns of the new loop or a `ValueError` will be raised.""" lower_newname = newname.lower() loop_no = self.FindLoop(oldname) if loop_no < 0: raise KeyError('%s not in loop' % oldname) if lower_newname in self.loops[loop_no]: return # check length old_provides = self.provide_value self.provide_value = False loop_len = len(self[oldname]) self.provide_value = old_provides if len(self[newname]) != loop_len: raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) # remove from any other loops [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] # and add to this loop self.loops[loop_no].append(lower_newname) # remove from item_order if present try: self.item_order.remove(lower_newname) except ValueError: pass
def AddToLoop(
self, dataname, loopdata)
Deprecated. Use AddItem
followed by calls to AddLoopName
.
Add multiple columns to the loop containing dataname
. loopdata
is a
collection of (key,value) pairs, where key
is the new dataname and value
is a list of values for that dataname
def AddToLoop(self,dataname,loopdata): """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. Add multiple columns to the loop containing `dataname`. `loopdata` is a collection of (key,value) pairs, where `key` is the new dataname and `value` is a list of values for that dataname""" # check lengths thisloop = self.FindLoop(dataname) loop_len = len(self[dataname]) bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] if len(bad_vals)>0: raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ % (repr( bad_vals ),loop_len)) self.update(loopdata) self.loops[thisloop]+=loopdata.keys()
def ChangeItemOrder(
self, itemname, newpos)
Move the printout order of itemname
to newpos
. If itemname
is
in a loop, newpos
refers to the order within the loop.
def ChangeItemOrder(self,itemname,newpos): """Move the printout order of `itemname` to `newpos`. If `itemname` is in a loop, `newpos` refers to the order within the loop.""" if isinstance(itemname,(unicode,str)): true_name = itemname.lower() else: true_name = itemname loopno = self.FindLoop(true_name) if loopno < 0: #top level self.item_order.remove(true_name) self.item_order.insert(newpos,true_name) else: self.loops[loopno].remove(true_name) self.loops[loopno].insert(newpos,true_name)
def CreateLoop(
self, datanames, order=-1, length_check=True)
Create a loop in the datablock. datanames
is a list of datanames that
together form a loop. If length_check is True, they should have been initialised in the block
to have the same number of elements (possibly 0). If order
is given,
the loop will appear at this position in the block when printing
out. A loop counts as a single position.
def CreateLoop(self,datanames,order=-1,length_check=True): """Create a loop in the datablock. `datanames` is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If `order` is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.""" if length_check: # check lengths: these datanames should exist listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] if len(listed_values) == len(datanames): len_set = set([len(self[a]) for a in datanames]) if len(len_set)>1: raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) elif len(listed_values) != 0: raise ValueError('Request to loop datanames where some are single values and some are not') # store as lower case lc_datanames = [d.lower() for d in datanames] # remove these datanames from all other loops [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] # remove empty loops empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] for a in empty_loops: self.item_order.remove(a) del self.loops[a] if len(self.loops)>0: loopno = max(self.loops.keys()) + 1 else: loopno = 1 self.loops[loopno] = list(lc_datanames) if order >= 0: self.item_order.insert(order,loopno) else: self.item_order.append(loopno) # remove these datanames from item ordering self.item_order = [a for a in self.item_order if a not in lc_datanames]
def FindLoop(
self, keyname)
Find the loop that contains keyname
and return its numerical index or
-1 if not present. The numerical index can be used to refer to the loop in
other routines.
def FindLoop(self,keyname): """Find the loop that contains `keyname` and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.""" loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] if len(loop_no)>0: return loop_no[0] else: return -1
def GetCompoundKeyedPacket(
self, keydict)
Return the loop packet (a StarPacket
object) where the {key:(value,caseless)}
pairs
in keydict
take the appropriate values. Ignore case for a given key
if caseless
is
True. ValueError
is raised if no packet is found or more than one packet is found.
def GetCompoundKeyedPacket(self,keydict): """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname]) keynames = list(keydict.keys()) my_loop = self.GetLoop(keynames[0]) for one_key in keynames: keyval,no_case = keydict[one_key] if no_case: my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()]) else: my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval]) if len(my_loop)!=1: raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop))) print("Compound keyed packet: %s" % my_loop[0]) return my_loop[0]
def GetFullItemValue(
self, itemname)
Return the value associated with itemname
, and a boolean flagging whether
(True) or not (False) it is in a form suitable for calculation. False is
always returned for strings and StarList
objects.
def GetFullItemValue(self,itemname): """Return the value associated with `itemname`, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and `StarList` objects.""" try: s,v = self.block[itemname.lower()] except KeyError: raise KeyError('Itemname %s not in datablock' % itemname) # prefer string value unless all are None # are we a looped value? if not isinstance(s,(tuple,list)) or isinstance(s,StarList): if not_none(s): return s,False #a string value else: return v,not isinstance(v,StarList) #a StarList is not calculation-ready elif not_none(s): return s,False #a list of string values else: if len(v)>0: return v,not isinstance(v[0],StarList) return v,True
def GetItemOrder(
self)
Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index
def GetItemOrder(self): """Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index""" return self.item_order[:]
def GetItemPosition(
self, itemname)
A utility function to get the numerical order in the printout
of itemname
. An item has coordinate (loop_no,pos)
with
the top level having a loop_no
of -1. If an integer is passed to
the routine then it will return the position of the loop
referenced by that number.
def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos
def GetItemValue(
self, itemname)
Return value of itemname
. If itemname
is looped, a list
of all values will be returned.
def GetItemValue(self,itemname): """Return value of `itemname`. If `itemname` is looped, a list of all values will be returned.""" return self.GetFullItemValue(itemname)[0]
def GetKeyedPacket(
self, keyname, keyvalue, no_case=False)
Return the loop packet (a StarPacket
object) where keyname
has value
keyvalue
. Ignore case in keyvalue
if no_case
is True. ValueError
is raised if no packet is found or more than one packet is found.
def GetKeyedPacket(self,keyname,keyvalue,no_case=False): """Return the loop packet (a `StarPacket` object) where `keyname` has value `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" my_loop = self.GetLoop(keyname) #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) #print('Packet check on:' + keyname) #[print(repr(getattr(a,keyname))) for a in my_loop] if no_case: one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] else: one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] if len(one_pack)!=1: raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) print("Keyed packet: %s" % one_pack[0]) return one_pack[0]
def GetKeyedSemanticPacket(
self, keyvalue, cat_id)
Return a complete packet for category cat_id
where the
category key for the category equals keyvalue
. This routine
will understand any joined loops, so if separate loops in the
datafile belong to the
same category hierarchy (e.g. _atom_site
and _atom_site_aniso
),
the returned StarPacket
object will contain datanames from
both categories.
def GetKeyedSemanticPacket(self,keyvalue,cat_id): """Return a complete packet for category `cat_id` where the category key for the category equals `keyvalue`. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from both categories.""" target_keys = self.dictionary.cat_key_table[cat_id] target_keys = [k[0] for k in target_keys] #one only in each list p = StarPacket() # set case-sensitivity flag lcase = False if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']: lcase = True for cat_key in target_keys: try: extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except KeyError: #missing key try: test_key = self[cat_key] #generate key if possible print('Test key is %s' % repr( test_key )) if test_key is not None and\ not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)): print('Getting packet for key %s' % repr( keyvalue )) extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except: #cannot be generated continue except ValueError: #none/more than one, assume none continue #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue) p.merge_packet(extra_packet) # the following attributes used to calculate missing values for keyname in target_keys: if hasattr(p,keyname): p.key = [keyname] break if not hasattr(p,"key"): raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p))) p.cif_dictionary = self.dictionary p.fulldata = self return p
def GetLoop(
self, keyname)
Return a StarFile.LoopBlock
object constructed from the loop containing keyname
.
keyname
is only significant as a way to specify the loop.
def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname)
def GetLoopNames(
self, keyname)
Return all datanames appearing together with keyname
def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname)
def GetMultiKeyedSemanticPacket(
self, keydict, cat_id)
Return a complete packet for category cat_id
where the keyvalues are
provided as a dictionary of key:(value,caseless) pairs
This routine
will understand any joined loops, so if separate loops in the
datafile belong to the
same category hierarchy (e.g. _atom_site
and _atom_site_aniso
),
the returned StarPacket
object will contain datanames from
the requested category and any children.
def GetMultiKeyedSemanticPacket(self,keydict,cat_id): """Return a complete packet for category `cat_id` where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from the requested category and any children.""" #if len(keyvalues)==1: #simplification # return self.GetKeyedSemanticPacket(keydict[1][0],cat_id) target_keys = self.dictionary.cat_key_table[cat_id] # update the dictionary passed to us with all equivalents, for # simplicity. parallel_keys = list(zip(*target_keys)) #transpose print('Parallel keys:' + repr(parallel_keys)) print('Keydict:' + repr(keydict)) start_keys = list(keydict.keys()) for one_name in start_keys: key_set = [a for a in parallel_keys if one_name in a] for one_key in key_set: keydict[one_key] = keydict[one_name] # target_keys is a list of lists, each of which is a compound key p = StarPacket() # a little function to return the dataname for a key def find_key(key): for one_key in self.dictionary.key_equivs.get(key,[])+[key]: if self.has_key(one_key): return one_key return None for one_set in target_keys: #loop down the categories true_keys = [find_key(k) for k in one_set] true_keys = [k for k in true_keys if k is not None] if len(true_keys)==len(one_set): truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)]) try: extra_packet = self.GetCompoundKeyedPacket(truekeydict) except KeyError: #one or more are missing continue #should try harder? except ValueError: continue else: continue print('Merging packet for keys ' + repr(one_set)) p.merge_packet(extra_packet) # the following attributes used to calculate missing values p.key = true_keys p.cif_dictionary = self.dictionary p.fulldata = self return p
def RemoveItem(
self, itemname)
Remove itemname
from the block.
def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey)
def RemoveKeyedPacket(
self, keyname, keyvalue)
Remove the packet for which dataname keyname
takes
value keyvalue
. Only the first such occurrence is
removed.
def RemoveKeyedPacket(self,keyname,keyvalue): """Remove the packet for which dataname `keyname` takes value `keyvalue`. Only the first such occurrence is removed.""" packet_coord = list(self[keyname]).index(keyvalue) loopnames = self.GetLoopNames(keyname) for dataname in loopnames: self.block[dataname][0] = list(self.block[dataname][0]) del self.block[dataname][0][packet_coord] self.block[dataname][1] = list(self.block[dataname][1]) del self.block[dataname][1][packet_coord]
def RemoveLoopItem(
self, itemname)
Deprecated. Use RemoveItem
instead
def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname)
def SetOutputLength(
self, wraplength=80, maxoutlength=2048)
Set the maximum output line length (maxoutlength
) and the line length to
wrap at (wraplength
). The wrap length is a target only and may not always be
possible.
def SetOutputLength(self,wraplength=80,maxoutlength=2048): """Set the maximum output line length (`maxoutlength`) and the line length to wrap at (`wraplength`). The wrap length is a target only and may not always be possible.""" if wraplength > maxoutlength: raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength)) self.wraplength = wraplength self.maxoutlength = maxoutlength
class ValidCifBlock
A CifBlock
that is valid with respect to a given CIF dictionary. Methods
of CifBlock
are overridden where necessary to disallow addition of invalid items to the
CifBlock
.
Initialisation
dic
is aCifDic
object to be used for validation.
class ValidCifBlock(CifBlock): """A `CifBlock` that is valid with respect to a given CIF dictionary. Methods of `CifBlock` are overridden where necessary to disallow addition of invalid items to the `CifBlock`. ## Initialisation * `dic` is a `CifDic` object to be used for validation. """ def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords): CifBlock.__init__(self,*args,**kwords) if dic and diclist: print("Warning: diclist argument ignored when initialising ValidCifBlock") if isinstance(dic,CifDic): self.fulldic = dic else: raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument") if len(diclist)==0 and not dic: raise ValidCifError( "At least one dictionary must be specified") if diclist and not dic: self.fulldic = merge_dic(diclist,mergemode) if not self.run_data_checks()[0]: raise ValidCifError( self.report()) def run_data_checks(self,verbose=False): self.v_result = {} self.fulldic.optimize_on() for dataname in self.keys(): update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname])) update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self)) for loop_names in self.loops.values(): update_value(self.v_result,self.fulldic.run_loop_validation(loop_names)) # now run block-level checks update_value(self.v_result,self.fulldic.run_block_validation(self)) # return false and list of baddies if anything didn't match self.fulldic.optimize_off() all_keys = list(self.v_result.keys()) #dictionary will change for test_key in all_keys: #print("%s: %r" % (test_key, self.v_result[test_key])) self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False] if len(self.v_result[test_key]) == 0: del self.v_result[test_key] isvalid = len(self.v_result)==0 #if not isvalid: # print("Baddies: {!r}".format(self.v_result)) return isvalid,self.v_result def single_item_check(self,item_name,item_value): #self.match_single_item(item_name) if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_item_validation(item_name,item_value) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def loop_item_check(self,loop_names): in_dic_names = list([a for a in loop_names if a in self.fulldic]) if len(in_dic_names)==0: result = {loop_names[0]:[]} else: result = self.fulldic.run_loop_validation(in_dic_names) baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies)) return isvalid,baddies def global_item_check(self,item_name,item_value,provisional_items={}): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_global_validation(item_name, item_value,self,provisional_items = provisional_items) baddies = list([a for a in result[item_name] if a[1]["result"] is False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def remove_global_item_check(self,item_name): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_remove_global_validation(item_name,self,False) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def AddToLoop(self,dataname,loopdata): # single item checks paired_data = loopdata.items() for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems) # loop item checks; merge with current loop found = 0 for aloop in self.block["loops"]: if dataname in aloop: loopnames = aloop.keys() for new_name in loopdata.keys(): if new_name not in loopnames: loopnames.append(new_name) valid,problems = self.looped_item_check(loopnames) self.report_if_invalid(valid,problems) prov_dict = loopdata.copy() for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems) CifBlock.AddToLoop(self,dataname,loopdata) def AddCifItem(self,data): if isinstance(data[0],(unicode,str)): # single item valid,problems = self.single_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) valid,problems = self.global_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) elif isinstance(data[0],tuple) or isinstance(data[0],list): paired_data = list(zip(data[0],data[1])) for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems,name) valid,problems = self.loop_item_check(data[0]) self.report_if_invalid(valid,problems,data[0]) prov_dict = {} # for storing temporary items for name,value in paired_data: prov_dict[name]=value for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems,name) else: raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item") super(ValidCifBlock,self).AddCifItem(data) def AddItem(self,key,value,**kwargs): """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary""" valid,problems = self.single_item_check(key,value) self.report_if_invalid(valid,problems,key) valid,problems = self.global_item_check(key,value) self.report_if_invalid(valid,problems,key) super(ValidCifBlock,self).AddItem(key,value,**kwargs) # utility function def report_if_invalid(self,valid,bad_list,data_name): if not valid: bad_tests = [a[0] for a in bad_list] error_string = ",".join(bad_tests) error_string = repr(data_name) + " fails following validity checks: " + error_string raise ValidCifError( error_string) def __delitem__(self,key): # we don't need to run single item checks; we do need to run loop and # global checks. if key in self: try: loop_items = self.GetLoop(key) except TypeError: loop_items = [] if loop_items: #need to check loop conformance loop_names = [a[0] for a in loop_items if a[0] != key] valid,problems = self.loop_item_check(loop_names) self.report_if_invalid(valid,problems) valid,problems = self.remove_global_item_check(key) self.report_if_invalid(valid,problems) self.RemoveCifItem(key) def report(self): outstr = StringIO() outstr.write( "Validation results\n") outstr.write( "------------------\n") print("%d invalid items found\n" % len(self.v_result)) for item_name,val_func_list in self.v_result.items(): outstr.write("%s fails following tests:\n" % item_name) for val_func in val_func_list: outstr.write("\t%s\n") return outstr.getvalue()
Ancestors (in MRO)
- ValidCifBlock
- CifBlock
- CifFile.StarFile.StarBlock
- __builtin__.object
Methods
def AddCifItem(
self, data)
Inheritance:
CifBlock
.AddCifItem
DEPRECATED. Use AddItem
instead.
def AddCifItem(self,data): if isinstance(data[0],(unicode,str)): # single item valid,problems = self.single_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) valid,problems = self.global_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) elif isinstance(data[0],tuple) or isinstance(data[0],list): paired_data = list(zip(data[0],data[1])) for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems,name) valid,problems = self.loop_item_check(data[0]) self.report_if_invalid(valid,problems,data[0]) prov_dict = {} # for storing temporary items for name,value in paired_data: prov_dict[name]=value for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems,name) else: raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item") super(ValidCifBlock,self).AddCifItem(data)
def AddItem(
self, key, value, **kwargs)
Set value of dataname key
to value
after checking for conformance with CIF dictionary
def AddItem(self,key,value,**kwargs): """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary""" valid,problems = self.single_item_check(key,value) self.report_if_invalid(valid,problems,key) valid,problems = self.global_item_check(key,value) self.report_if_invalid(valid,problems,key) super(ValidCifBlock,self).AddItem(key,value,**kwargs)
def AddLoopItem(
self, incomingdata, precheck=False, maxlength=-1)
Inheritance:
CifBlock
.AddLoopItem
Deprecated. Use AddItem
followed by CreateLoop
if
necessary.
def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): """*Deprecated*. Use `AddItem` followed by `CreateLoop` if necessary.""" # print "Received data %s" % `incomingdata` # we accept tuples, strings, lists and dicts!! # Direct insertion: we have a string-valued key, with an array # of values -> single-item into our loop if isinstance(incomingdata[0],(tuple,list)): # a whole loop keyvallist = zip(incomingdata[0],incomingdata[1]) for key,value in keyvallist: self.AddItem(key,value) self.CreateLoop(incomingdata[0]) elif not isinstance(incomingdata[0],(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) else: self.AddItem(incomingdata[0],incomingdata[1])
def AddLoopName(
self, oldname, newname)
Inheritance:
CifBlock
.AddLoopName
Add newname
to the loop containing oldname
. If it is already in the new loop, no
error is raised. If newname
is in a different loop, it is removed from that loop.
The number of values associated with newname
must match the number of values associated
with all other columns of the new loop or a ValueError
will be raised.
def AddLoopName(self,oldname, newname): """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no error is raised. If `newname` is in a different loop, it is removed from that loop. The number of values associated with `newname` must match the number of values associated with all other columns of the new loop or a `ValueError` will be raised.""" lower_newname = newname.lower() loop_no = self.FindLoop(oldname) if loop_no < 0: raise KeyError('%s not in loop' % oldname) if lower_newname in self.loops[loop_no]: return # check length old_provides = self.provide_value self.provide_value = False loop_len = len(self[oldname]) self.provide_value = old_provides if len(self[newname]) != loop_len: raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) # remove from any other loops [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] # and add to this loop self.loops[loop_no].append(lower_newname) # remove from item_order if present try: self.item_order.remove(lower_newname) except ValueError: pass
def AddSingleCifItem(
self, key, value)
Inheritance:
CifBlock
.AddSingleCifItem
Deprecated. Use AddItem
instead
def AddSingleCifItem(self,key,value): """*Deprecated*. Use `AddItem` instead""" """Add a single data item. If it is part of a loop, a separate call should be made""" self.AddItem(key,value)
def AddToLoop(
self, dataname, loopdata)
Inheritance:
CifBlock
.AddToLoop
Deprecated. Use AddItem
followed by calls to AddLoopName
.
Add multiple columns to the loop containing dataname
. loopdata
is a
collection of (key,value) pairs, where key
is the new dataname and value
is a list of values for that dataname
def AddToLoop(self,dataname,loopdata): # single item checks paired_data = loopdata.items() for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems) # loop item checks; merge with current loop found = 0 for aloop in self.block["loops"]: if dataname in aloop: loopnames = aloop.keys() for new_name in loopdata.keys(): if new_name not in loopnames: loopnames.append(new_name) valid,problems = self.looped_item_check(loopnames) self.report_if_invalid(valid,problems) prov_dict = loopdata.copy() for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems) CifBlock.AddToLoop(self,dataname,loopdata)
def ChangeItemOrder(
self, itemname, newpos)
Inheritance:
CifBlock
.ChangeItemOrder
Move the printout order of itemname
to newpos
. If itemname
is
in a loop, newpos
refers to the order within the loop.
def ChangeItemOrder(self,itemname,newpos): """Move the printout order of `itemname` to `newpos`. If `itemname` is in a loop, `newpos` refers to the order within the loop.""" if isinstance(itemname,(unicode,str)): true_name = itemname.lower() else: true_name = itemname loopno = self.FindLoop(true_name) if loopno < 0: #top level self.item_order.remove(true_name) self.item_order.insert(newpos,true_name) else: self.loops[loopno].remove(true_name) self.loops[loopno].insert(newpos,true_name)
def CreateLoop(
self, datanames, order=-1, length_check=True)
Inheritance:
CifBlock
.CreateLoop
Create a loop in the datablock. datanames
is a list of datanames that
together form a loop. If length_check is True, they should have been initialised in the block
to have the same number of elements (possibly 0). If order
is given,
the loop will appear at this position in the block when printing
out. A loop counts as a single position.
def CreateLoop(self,datanames,order=-1,length_check=True): """Create a loop in the datablock. `datanames` is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If `order` is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.""" if length_check: # check lengths: these datanames should exist listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] if len(listed_values) == len(datanames): len_set = set([len(self[a]) for a in datanames]) if len(len_set)>1: raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) elif len(listed_values) != 0: raise ValueError('Request to loop datanames where some are single values and some are not') # store as lower case lc_datanames = [d.lower() for d in datanames] # remove these datanames from all other loops [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] # remove empty loops empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] for a in empty_loops: self.item_order.remove(a) del self.loops[a] if len(self.loops)>0: loopno = max(self.loops.keys()) + 1 else: loopno = 1 self.loops[loopno] = list(lc_datanames) if order >= 0: self.item_order.insert(order,loopno) else: self.item_order.append(loopno) # remove these datanames from item ordering self.item_order = [a for a in self.item_order if a not in lc_datanames]
def FindLoop(
self, keyname)
Inheritance:
CifBlock
.FindLoop
Find the loop that contains keyname
and return its numerical index or
-1 if not present. The numerical index can be used to refer to the loop in
other routines.
def FindLoop(self,keyname): """Find the loop that contains `keyname` and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.""" loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] if len(loop_no)>0: return loop_no[0] else: return -1
def GetCompoundKeyedPacket(
self, keydict)
Inheritance:
CifBlock
.GetCompoundKeyedPacket
Return the loop packet (a StarPacket
object) where the {key:(value,caseless)}
pairs
in keydict
take the appropriate values. Ignore case for a given key
if caseless
is
True. ValueError
is raised if no packet is found or more than one packet is found.
def GetCompoundKeyedPacket(self,keydict): """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname]) keynames = list(keydict.keys()) my_loop = self.GetLoop(keynames[0]) for one_key in keynames: keyval,no_case = keydict[one_key] if no_case: my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()]) else: my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval]) if len(my_loop)!=1: raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop))) print("Compound keyed packet: %s" % my_loop[0]) return my_loop[0]
def GetFullItemValue(
self, itemname)
Inheritance:
CifBlock
.GetFullItemValue
Return the value associated with itemname
, and a boolean flagging whether
(True) or not (False) it is in a form suitable for calculation. False is
always returned for strings and StarList
objects.
def GetFullItemValue(self,itemname): """Return the value associated with `itemname`, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and `StarList` objects.""" try: s,v = self.block[itemname.lower()] except KeyError: raise KeyError('Itemname %s not in datablock' % itemname) # prefer string value unless all are None # are we a looped value? if not isinstance(s,(tuple,list)) or isinstance(s,StarList): if not_none(s): return s,False #a string value else: return v,not isinstance(v,StarList) #a StarList is not calculation-ready elif not_none(s): return s,False #a list of string values else: if len(v)>0: return v,not isinstance(v[0],StarList) return v,True
def GetItemOrder(
self)
Inheritance:
CifBlock
.GetItemOrder
Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index
def GetItemOrder(self): """Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index""" return self.item_order[:]
def GetItemPosition(
self, itemname)
Inheritance:
CifBlock
.GetItemPosition
A utility function to get the numerical order in the printout
of itemname
. An item has coordinate (loop_no,pos)
with
the top level having a loop_no
of -1. If an integer is passed to
the routine then it will return the position of the loop
referenced by that number.
def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos
def GetItemValue(
self, itemname)
Inheritance:
CifBlock
.GetItemValue
Return value of itemname
. If itemname
is looped, a list
of all values will be returned.
def GetItemValue(self,itemname): """Return value of `itemname`. If `itemname` is looped, a list of all values will be returned.""" return self.GetFullItemValue(itemname)[0]
def GetKeyedPacket(
self, keyname, keyvalue, no_case=False)
Inheritance:
CifBlock
.GetKeyedPacket
Return the loop packet (a StarPacket
object) where keyname
has value
keyvalue
. Ignore case in keyvalue
if no_case
is True. ValueError
is raised if no packet is found or more than one packet is found.
def GetKeyedPacket(self,keyname,keyvalue,no_case=False): """Return the loop packet (a `StarPacket` object) where `keyname` has value `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" my_loop = self.GetLoop(keyname) #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) #print('Packet check on:' + keyname) #[print(repr(getattr(a,keyname))) for a in my_loop] if no_case: one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] else: one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] if len(one_pack)!=1: raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) print("Keyed packet: %s" % one_pack[0]) return one_pack[0]
def GetKeyedSemanticPacket(
self, keyvalue, cat_id)
Inheritance:
CifBlock
.GetKeyedSemanticPacket
Return a complete packet for category cat_id
where the
category key for the category equals keyvalue
. This routine
will understand any joined loops, so if separate loops in the
datafile belong to the
same category hierarchy (e.g. _atom_site
and _atom_site_aniso
),
the returned StarPacket
object will contain datanames from
both categories.
def GetKeyedSemanticPacket(self,keyvalue,cat_id): """Return a complete packet for category `cat_id` where the category key for the category equals `keyvalue`. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from both categories.""" target_keys = self.dictionary.cat_key_table[cat_id] target_keys = [k[0] for k in target_keys] #one only in each list p = StarPacket() # set case-sensitivity flag lcase = False if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']: lcase = True for cat_key in target_keys: try: extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except KeyError: #missing key try: test_key = self[cat_key] #generate key if possible print('Test key is %s' % repr( test_key )) if test_key is not None and\ not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)): print('Getting packet for key %s' % repr( keyvalue )) extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except: #cannot be generated continue except ValueError: #none/more than one, assume none continue #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue) p.merge_packet(extra_packet) # the following attributes used to calculate missing values for keyname in target_keys: if hasattr(p,keyname): p.key = [keyname] break if not hasattr(p,"key"): raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p))) p.cif_dictionary = self.dictionary p.fulldata = self return p
def GetLoop(
self, keyname)
Return a StarFile.LoopBlock
object constructed from the loop containing keyname
.
keyname
is only significant as a way to specify the loop.
def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname)
def GetLoopNames(
self, keyname)
Inheritance:
CifBlock
.GetLoopNames
Return all datanames appearing together with keyname
def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname)
def GetMultiKeyedSemanticPacket(
self, keydict, cat_id)
Inheritance:
CifBlock
.GetMultiKeyedSemanticPacket
Return a complete packet for category cat_id
where the keyvalues are
provided as a dictionary of key:(value,caseless) pairs
This routine
will understand any joined loops, so if separate loops in the
datafile belong to the
same category hierarchy (e.g. _atom_site
and _atom_site_aniso
),
the returned StarPacket
object will contain datanames from
the requested category and any children.
def GetMultiKeyedSemanticPacket(self,keydict,cat_id): """Return a complete packet for category `cat_id` where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from the requested category and any children.""" #if len(keyvalues)==1: #simplification # return self.GetKeyedSemanticPacket(keydict[1][0],cat_id) target_keys = self.dictionary.cat_key_table[cat_id] # update the dictionary passed to us with all equivalents, for # simplicity. parallel_keys = list(zip(*target_keys)) #transpose print('Parallel keys:' + repr(parallel_keys)) print('Keydict:' + repr(keydict)) start_keys = list(keydict.keys()) for one_name in start_keys: key_set = [a for a in parallel_keys if one_name in a] for one_key in key_set: keydict[one_key] = keydict[one_name] # target_keys is a list of lists, each of which is a compound key p = StarPacket() # a little function to return the dataname for a key def find_key(key): for one_key in self.dictionary.key_equivs.get(key,[])+[key]: if self.has_key(one_key): return one_key return None for one_set in target_keys: #loop down the categories true_keys = [find_key(k) for k in one_set] true_keys = [k for k in true_keys if k is not None] if len(true_keys)==len(one_set): truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)]) try: extra_packet = self.GetCompoundKeyedPacket(truekeydict) except KeyError: #one or more are missing continue #should try harder? except ValueError: continue else: continue print('Merging packet for keys ' + repr(one_set)) p.merge_packet(extra_packet) # the following attributes used to calculate missing values p.key = true_keys p.cif_dictionary = self.dictionary p.fulldata = self return p
def RemoveCifItem(
self, itemname)
Inheritance:
CifBlock
.RemoveCifItem
Remove itemname
from the CifBlock
def RemoveCifItem(self,itemname): """Remove `itemname` from the CifBlock""" self.RemoveItem(itemname)
def RemoveItem(
self, itemname)
Inheritance:
CifBlock
.RemoveItem
Remove itemname
from the block.
def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey)
def RemoveKeyedPacket(
self, keyname, keyvalue)
Inheritance:
CifBlock
.RemoveKeyedPacket
Remove the packet for which dataname keyname
takes
value keyvalue
. Only the first such occurrence is
removed.
def RemoveKeyedPacket(self,keyname,keyvalue): """Remove the packet for which dataname `keyname` takes value `keyvalue`. Only the first such occurrence is removed.""" packet_coord = list(self[keyname]).index(keyvalue) loopnames = self.GetLoopNames(keyname) for dataname in loopnames: self.block[dataname][0] = list(self.block[dataname][0]) del self.block[dataname][0][packet_coord] self.block[dataname][1] = list(self.block[dataname][1]) del self.block[dataname][1][packet_coord]
def RemoveLoopItem(
self, itemname)
Inheritance:
CifBlock
.RemoveLoopItem
Deprecated. Use RemoveItem
instead
def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname)
def SetOutputLength(
self, wraplength=80, maxoutlength=2048)
Inheritance:
CifBlock
.SetOutputLength
Set the maximum output line length (maxoutlength
) and the line length to
wrap at (wraplength
). The wrap length is a target only and may not always be
possible.
def SetOutputLength(self,wraplength=80,maxoutlength=2048): """Set the maximum output line length (`maxoutlength`) and the line length to wrap at (`wraplength`). The wrap length is a target only and may not always be possible.""" if wraplength > maxoutlength: raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength)) self.wraplength = wraplength self.maxoutlength = maxoutlength
class ValidCifError
class ValidCifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Validity error: ' + self.value
Ancestors (in MRO)
- ValidCifError
- exceptions.Exception
- exceptions.BaseException
- __builtin__.object
class ValidCifFile
A CIF file for which all datablocks are valid. Argument dic
to
initialisation specifies a CifDic
object to use for validation.
class ValidCifFile(CifFile): """A CIF file for which all datablocks are valid. Argument `dic` to initialisation specifies a `CifDic` object to use for validation.""" def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs): if not diclist and not dic and not hasattr(self,'bigdic'): raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object") if not dic and diclist: #merge here for speed self.bigdic = merge_dic(diclist,mergemode) elif dic and not diclist: self.bigdic = dic CifFile.__init__(self,*args,**kwargs) for blockname in self.keys(): self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic) def NewBlock(self,blockname,blockcontents,**kwargs): CifFile.NewBlock(self,blockname,blockcontents,**kwargs) # dictionary[blockname] is now a CifBlock object. We # turn it into a ValidCifBlock object self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic, data=self.dictionary[blockname])
Ancestors (in MRO)
- ValidCifFile
- CifFile
- CifFile.StarFile.StarFile
- CifFile.StarFile.BlockCollection
- __builtin__.object
Methods
def NewBlock(
self, blockname, blockcontents, **kwargs)
Add a new block named blockname
with contents blockcontents
. If fix
is True, blockname
will have spaces and tabs replaced by underscores. parent
allows a parent block to be set so that block hierarchies can be created. Depending on
the output standard, these blocks will be printed out as nested save frames or
ignored.
def NewBlock(self,blockname,blockcontents,**kwargs): CifFile.NewBlock(self,blockname,blockcontents,**kwargs) # dictionary[blockname] is now a CifBlock object. We # turn it into a ValidCifBlock object self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic, data=self.dictionary[blockname])
def SetTemplate(
self, template_file)
Inheritance:
CifFile
.SetTemplate
Use template_file
as a template for all block output
def SetTemplate(self,template_file): """Use `template_file` as a template for all block output""" self.master_template = process_template(template_file) for b in self.dictionary.values(): b.formatting_hints = self.master_template
def WriteOut(
self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)
Return the contents of this file as a string, wrapping if possible at wraplength
characters and restricting maximum line length to maxoutlength
. Delimiters and
save frame nesting are controlled by self.grammar
. If blockorder
is
provided, blocks are output in this order unless nested save frames have been
requested (STAR2). The default block order is the order in which blocks were input.
saves_after
inserts all save frames after the given dataname,
which allows less important items to appear later. Useful in conjunction with a
template for dictionary files.
def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None): """Return the contents of this file as a string, wrapping if possible at `wraplength` characters and restricting maximum line length to `maxoutlength`. Delimiters and save frame nesting are controlled by `self.grammar`. If `blockorder` is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. `saves_after` inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.""" if maxoutlength != 0: self.SetOutputLength(maxoutlength) if not comment: comment = self.header_comment outstring = StringIO() if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0": outstring.write(r"#\#CIF_2.0" + "\n") outstring.write(comment) # prepare all blocks for b in self.dictionary.values(): b.set_grammar(self.grammar) b.formatting_hints = self.master_template b.SetOutputLength(wraplength,self.maxoutlength) # loop over top-level # monitor output all_names = list(self.child_table.keys()) #i.e. lower case if blockorder is None: blockorder = self.block_input_order top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None] for blockref,blockname in top_block_names: print('Writing %s, ' % blockname + repr(self[blockref])) outstring.write('\n' + 'data_' +blockname+'\n') all_names.remove(blockref) if self.standard == 'Dic': #put contents before save frames outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application')) if self.grammar == 'STAR2': #nested save frames child_refs = self.get_immediate_children(blockref) for child_ref,child_info in child_refs: child_name = child_info.block_id outstring.write('\n\n' + 'save_' + child_name + '\n') self.block_to_string_nested(child_ref,child_name,outstring,4) outstring.write('\n' + 'save_'+ '\n') elif self.grammar in ('1.0','1.1','2.0'): #non-nested save frames child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)] for child_ref in child_refs: child_name = self.child_table[child_ref].block_id outstring.write('\n\n' + 'save_' + child_name + '\n') outstring.write(str(self[child_ref])) outstring.write('\n\n' + 'save_' + '\n') all_names.remove(child_ref.lower()) else: raise StarError('Grammar %s is not recognised for output' % self.grammar) if self.standard != 'Dic': #put contents after save frames outstring.write(str(self[blockref])) else: outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application')) returnstring = outstring.getvalue() outstring.close() if len(all_names)>0: print('WARNING: following blocks not output: %s' % repr(all_names)) else: print('All blocks output.') return returnstring
class ValidationResult
Represents validation result. It is initialised with
class ValidationResult: """Represents validation result. It is initialised with """ def __init__(self,results): """results is return value of validate function""" self.valid_result, self.no_matches = results def report(self,use_html): """Return string with human-readable description of validation result""" return validate_report((self.valid_result, self.no_matches),use_html) def is_valid(self,block_name=None): """Return True for valid CIF file, otherwise False""" if block_name is not None: block_names = [block_name] else: block_names = self.valid_result.iterkeys() for block_name in block_names: if not self.valid_result[block_name] == (True,{}): valid = False break else: valid = True return valid def has_no_match_items(self,block_name=None): """Return true if some items are not found in dictionary""" if block_name is not None: block_names = [block_name] else: block_names = self.no_matches.iter_keys() for block_name in block_names: if self.no_matches[block_name]: has_no_match_items = True break else: has_no_match_items = False return has_no_match_items