Common parser for CIF/STAR grammar

<CIF2 grammar>=
import ply.yacc as yacc
from cif_lex_2_0 import tokens
from StarFile import StarFile,StarBlock
<Helper functions>
<CIF grammar common features>
<CIF2 additions>
<Postamble>

<CIF1.1 grammar>=
import ply.yacc as yacc
from cif_lex_1_1 import tokens
from StarFile import StarFile,StarBlock
<Helper functions>
<CIF grammar common features>
<CIF1 specific productions>
<Postamble>


<Postamble>= (<-U <-U)
parser = yacc.yacc()

Copied from our Yapps parser

<CIF grammar common features>= (<-U <-U)

def p_input(p):
    ''' input : dblock
              | input dblock
              | '''
    if len(p) == 1:
        p[0] = StarFile()
    elif len(p) == 2:
        p[0] = p[1]
    else:
        p[1].merge_fast(p[2])
        p[0] = p[1]

def p_dblock(p):
    ''' dblock : DATA_HEADING data_contents
               | DATA_HEADING '''
    heading = p[1][5:]
    p[0] = StarFile(characterset='unicode')
    p[0].NewBlock(heading,StarBlock(overwrite=False))
    if len(p) == 3:
        # Traverse our mini AST
        for dc in p[2]:
            if dc[0] == 'SAVE':
                p[0].merge_fast(dc[1],parent=heading)
            elif dc[0] == 'KVPAIR':
                p[0][heading].AddItem(dc[1],dc[2],precheck=False)
            elif dc[0] == 'LOOP':
                makeloop(p[0][heading],dc[1:])
            else:
                raise SyntaxError, 'Programming error, what is ' + `dc[0]`

def p_data_contents(p):
    ''' data_contents : dataseq 
                     | save_frame
                     | data_contents dataseq 
                     | data_contents save_frame '''
    if len(p) == 2:
        p[0] = p[1]
    else:
        p[0] = p[1] + p[2]

def p_dataseq(p):
    ''' dataseq : data
                | dataseq data '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1] + [p[2]]

def p_data(p):
    ''' data : top_loop
             | datakvpair '''
    p[0] = p[1]

def p_datakvpair(p):
    ''' datakvpair : DATA_NAME data_value '''
    p[0] = ['KVPAIR',p[1],p[2]]

def p_top_loop(p):
    ''' top_loop : LBLOCK loopfield loopvalues '''
    p[0] = ['LOOP',p[2],p[3]]

def p_loopfield(p):
    ''' loopfield : DATA_NAME
                  | loopfield DATA_NAME '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1] + [p[2]]

def p_loopvalues(p):
    ''' loopvalues : data_value
                   | loopvalues data_value '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1] + [p[2]]

def p_save_frame(p):
    ''' save_frame : SAVE_HEADING data_contents SAVE_END
                   | SAVE_HEADING SAVE_END '''
    heading = p[1][5:]
    myframe = StarFile(characterset='unicode')
    myframe.NewBlock(heading,StarBlock(overwrite=False))
    if len(p) == 4:
        # Traverse our mini AST
        for dc in p[2]:
            if dc[0] == 'SAVE':
                myframe.merge_fast(dc[1],parent=heading)
            elif dc[0] == 'KVPAIR':
                myframe[heading].AddItem(dc[1],dc[2],precheck=False)
            elif dc[0] == 'LOOP':
                makeloop(myframe[heading],dc[1:])
    p[0] = [['SAVE',myframe]]

These productions for CIF1 do not include the bracket expressions or separate delimited expressions.

<CIF1 specific productions>= (<-U)
def p_data_value(p):
    ''' data_value : DATA_VALUE_1
                   | END_SC_LINE '''
    p[0] = p[1]

CIF2 most notably adds lists and tables.

<CIF2 additions>= (<-U)
def p_data_value(p):
    ''' data_value : DATA_VALUE_1
                   | delimited_data_value
                   | END_SC_LINE
                   | square_bracket_expr
                   | curly_bracket_expr '''
    p[0] = p[1]

def p_delimited_data_value(p):
    ''' delimited_data_value : TRIPLE_QUOTE_DATA_VALUE
                             | TRIPLE_APOST_DATA_VALUE
                             | SINGLE_QUOTE_DATA_VALUE '''
    p[0] = p[1]

def p_square_bracket_expr(p):
    ''' square_bracket_expr : '[' list_builder ']' '''
    p[0] = StarList(p[2])

def p_list_builder(p):
    ''' list_builder : data_value
                     | list_builder data_value
                     | '''
    if len(p) == 2:
        p[0] = [p[1]]
    elif len(p) == 3:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = []

def p_curly_bracket_expr(p):
    ''' curly_bracket_expr : '{' table_builder '}' '''
    p[0] = StarDict(pairwise(p[2]))

def p_table_builder(p):
    ''' table_builder : delimited_data_value ':' data_value
                      | table_builder delimited_data_value ':' data_value '''
    if len(p) == 4:
        p[0] = [p[1],p[3]]
    elif len(p) == 5:
        p[0] = p[1] + [p[2],p[4]] 

Some convenience functions

<Helper functions>= (<-U <-U)
def pairwise(iterable):
    itnext = iter(p[2]).next
    while 1:
            yield itnext(),itnext()

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print 'Making loop with %s' % `itemlists`
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # print 'Makeloop constructed %s' % `loopstructure`
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % `loop_seq`
        print >>sys.stderr, error_string
        raise ValueError, error_string