Source code for pynfg.pgtsolutions.intelligence.iq_iterated

# -*- coding: utf-8 -*-
"""
Implements PGT intelligence for iterSemiNFG objects

Part of: PyNFG - a Python package for modeling and solving Network Form Games

Created on Wed Jan  2 16:33:36 2013

Copyright (C) 2013 James Bono (jwbono@gmail.com)

GNU Affero General Public License

"""
from __future__ import division
import copy
import numpy as np
#from classes import *
import scipy.stats.distributions as randvars

[docs]def iq_MC_iter(G, S, X, M, delta, integrand=None, mix=False): """Run MC outer loop on random policy sequences for iterSemiNFG IQ calcs :arg G: the iterated semiNFG to be evaluated :type G: iterSemiNFG :arg S: number of policy sequences to sample :type S: int :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies to compare :type M: int :arg delta: the discount factor :type delta: float :arg integrand: a user-supplied function of G that is evaluated for each s in S :type integrand: func .. note:: This will throw an error if there is a decision node in G.starttime that is not repeated throughout the net. """ T0 = G.starttime T = G.endtime intel = {} #keys are base names, vals vals are iq panel series funcout = {} #keys are s in S, vals are eval of integrand of G(s) bnlist = [d.basename for d in G.time_partition[T0] if \ isinstance(d, pynfg.DecisionNode)] for bn in bnlist: #preallocating iq dict entries intel[bn] = np.zeros((S,T-T0)) for s in xrange(0,S): #sampling S sequences of policy profiles for t in xrange(T0, T+1): #sampling a sequence of policy profiles # gather list of decision nodes in time tout for bn in bnlist: G.bn_part[bn][t-T0].randomCPT(mixed=mix, setCPT=True) #drawing current policy for dd in G.bn_part[bn][t::]: dd.CPT = G.bn_part[bn][t-T0].CPT #apply policy to future for bn in bnlist: #find the iq of each player's policy in turn intel[bn][s,t-T0] = iq_calc_iter(bn, G, X, M, delta, t) if integrand is not None: funcout[s] = integrand(G) #eval integrand G(s), assign to funcout return intel, funcout
[docs]def iq_MH_iter(G, S, X, M, noise, dens, delta, integrand=None, mix=False): """Run MH for iterSemiNFG IQ calcs :arg G: the iterated semiNFG to be evaluated :type G: iterSemiNFG :arg S: number of MH iterations :type S: int :arg noise: the degree of independence of the proposal distribution on the current value. :type noise: float :arg dens: the function that assigns weights to iq :type dens: func :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies to compare :type M: int :arg delta: the discount factor :type delta: float :arg integrand: a user-supplied function of G that is evaluated for each s in S :type integrand: func .. note:: This will throw an error if there is a decision node in G.starttime that is not repeated throughout the net. """ T0 = G.starttime T = G.endtime iq = np.zeros((S,T-T0)) #panel series of iq for each MH step and time step intel = {} #keys are base names, vals are iq time step series # gather list of decision nodes in base game bnlist = [d.basename for d in G.time_partition[T0] if \ isinstance(d, pynfg.DecisionNode)] for bn in bnlist: #preallocating iq dict entries intel[bn] = [0]*[T-T0] funcout = {} #keys are s in S, vals are eval of integrand of G(s) funcout[0] = 0 for s in xrange(1, S+1): #sampling S sequences of policy profiles GG = copy.deepcopy(G) rt = randvars.randint.rvs(T0,T+1) ind = randvars.randint.rvs(0, len(bnlist)) rn = bnlist[ind] GG.bn_part[rn][rt-T0].CPT = G.bn_part[rn][rt-T0].perturbCPT(noise, \ mixed=mix, setCPT=False) for dd in GG.bn_part[rn][rt-T0::]: dd.CPT = GG.bn_part[rn][rt-T0].CPT #apply policy to future propiq = iq_calc_iter(rn, GG, X, M, delta, rt) #getting iq # The MH decision verdict = mh_decision(dens(propiq), dens(intel[rn][rt-T0])) if verdict: #accepting new CPT intel[rn][rt-T0] = propiq G.bn_part[rn][rt-T0].CPT = GG.bn_part[rn][rt-T0].CPT iq[s,:] = iq[s-1,:] iq[s,rt] else: iq[s] = iq[s-1] if integrand is not None: funcout[s] = integrand(G) #eval integrand G(s), assign to funcout return intel, funcout
[docs]def iq_calc_iter(bn, G, X, M, delta, start): """Calc IQ of policy at bn,start in G from the given starting point :arg bn: the basename of the DN to be evaluated :type bn: str :arg G: the iterated semiNFG to be evaluated :type G: iterSemiNFG :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies with which to compare :type M: int :arg delta: the discount factor :type delta: float :arg start: the starting time step :type start: int """ npvreward = 0 p = G.bn_part[bn][start].player for x in xrange(1,X+1): G.sample() #sample from start to the end of the net #npv reward for the player's real policy npvreward = (G.npv_reward(p, start, delta) + (x-1)*npvreward)/x altnpv = [0]*M G1 = copy.deepcopy(G) # Ylist = [j for j in G1.partition[p] if j.time == start] for m in xrange(0,M): #Sample M alt policies for the player # for d in Ylist: G1.bn_part[bn][start].randomCPT(setCPT=True) #rand altpolicy for each DN in time start for n in G1.bn_part[bn][start::]: n.CPT = G1.bn_part[bn][start].CPT #apply altpolicy to future copies of current DN G1.sample() #sample altpolicy prof. to end of net altnpv[m] = G1.npv_reward(p, start, delta) #get alt npvreward worse = [j for j in altnpv if j<=npvreward] #alts worse than G return len(worse)/M #fraction of alts worse than G is IQ
[docs]def mh_decision(p,q): """Decide to accept the new draw or keep the old one :arg p: the unnormalized likelihood of the new draw :type p: float :arg q: the unnormalized likelihood of the old draw :type q: float """ if q<=0: a = 1 else: a = min([p/q, 1]) u = np.random.rand() if a > u: verdict = True else: verdict = False return verdict