Source code for pynfg.pgtsolutions.intelligence.iq_policy

# -*- coding: utf-8 -*-
"""
Implements PGT intelligence for policies for SemiNFG objects

Created on Fri Mar 22 15:32:33 2013

Copyright (C) 2013 James Bono (jwbono@gmail.com)

GNU Affero General Public License

"""

from __future__ import division
import copy
import numpy as np
from pynfg import DecisionNode, iterSemiNFG

[docs]def iq_MC_policy(G, S, noise, X, M, innoise=1, delta=1, integrand=None, \ mix=False, satisfice=None): """Run MC outer loop on random policies for SemiNFG IQ calcs :arg G: the semiNFG to be evaluated :type G: SemiNFG :arg S: number of policy profiles to sample :type S: int :type M: int :arg noise: the degree of independence of the proposal distribution on the current value. 1 is independent, 0 returns no perturbation. :type noise: float :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies to compare :arg innoise: the perturbation noise for the loop within iq_calc to draw alt CPTs to compare utilities to current CPT. :type innoise: float :arg delta: the discount factor (ignored if SemiNFG) :type delta: float :arg integrand: a user-supplied function of G that is evaluated for each s in S :type integrand: func :arg pure: True if restricting sampling to pure strategies. False if mixed strategies are included in sampling. Default is True. .. note:: This is the coordinated-approach because intelligence is assigned to a player instead of being assigned to a DecisionNode """ intel = {} #keys are dn names, vals are iq time series iq = {} weight = {} w = {} funcout = {} #keys are s in S, vals are eval of integrand of G(s) bndict = {} T0 = G.starttime for p in G.players: #getting player-keyed dict of basenames bndict[p] = [x.basename for x in G.partition[p] if x.time==T0] for s in xrange(1, S+1): #sampling S policy profiles print s GG = copy.deepcopy(G) for p in G.players: w[p] = 1 for bn in bndict[p]: w[p] *= GG.bn_part[bn][T0].perturbCPT(noise, mixed=mix, \ returnweight=True) # for dn in GG.bn_part[bn][T0+1:]: # dn.CPT = GG.bn_part[bn][T0].CPT for p in G.players: #find the iq of each player's policy in turn iq[p] = iq_calc_policy(p, GG, X, M, mix, delta, innoise, satisfice) if integrand is not None: funcout[s] = integrand(GG) #eval integrand G(s), assign to funcout intel[s] = copy.deepcopy(iq) weight[s] = copy.deepcopy(w) return intel, funcout, weight
[docs]def iq_MH_policy(G, S, density, noise, X, M, innoise=1, delta=1, \ integrand=None, mix=False, satisfice=None): """Run MH for SemiNFG with IQ calcs :arg G: the SemiNFG to be evaluated :type G: SemiNFG :arg S: number of MH iterations :type S: int :arg density: the function that assigns weights to iq :type density: func :arg noise: the degree of independence of the proposal distribution on the current value. 1 is independent, 0 returns no perturbation. :type noise: float :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies to compare :type M: int :arg innoise: the perturbation noise for the loop within iq_calc to draw alt CPTs to compare utilities to current CPT. :type innoise: float :arg delta: the discount factor (ignored if SemiNFG) :type delta: float :arg integrand: a user-supplied function of G that is evaluated for each s in S :type integrand: func :arg mix: if true, proposal distribution is over mixed CPTs. Default is False. :type mix: bool .. note:: This is the coordinated-approach because intelligence is assigned to a player instead of being assigned to a DecisionNode """ intel = {} #keys are s in S, vals are iq dict (dict of dicts) iq = {} #keys are base names, iq timestep series funcout = {} #keys are s in S, vals are eval of integrand of G(s) dens = np.zeros(S+1) #storing densities for return bndict = {} T0 = G.starttime for p in G.players: #getting player-keyed dict of basenames bndict[p] = [x.basename for x in G.partition[p] if x.time==T0] for s in xrange(1, S+1): #sampling S sequences of policy profiles print s GG = copy.deepcopy(G) for p in G.players: for bn in bndict[p]: GG.bn_part[bn][T0].perturbCPT(noise, mixed=mix) for p in GG.players:#getting iq iq[p] = iq_calc_coord(p, GG, X, M, mix, delta, innoise, satisfice) # The MH decision current_dens = density(iq) verdict = mh_decision(current_dens, dens[s-1]) if verdict: #accepting new CPT intel[s] = copy.deepcopy(iq) G = copy.deepcopy(GG) dens[s] = current_dens else: intel[s] = intel[s-1] dens[s] = dens[s-1] if integrand is not None: funcout[s] = integrand(G) #eval integrand G(s), assign to funcout return intel, funcout, dens[1::]
[docs]def iq_calc_policy(p, G, X, M, mix, delta, innoise, satisfice=None): """Calc IQ of player p in G across all of p's decision nodes :arg p: the name of the player whose intelligence is being evaluated. :type p: str :arg G: the semiNFG to be evaluated :type G: SemiNFG :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies with which to compare :type M: int :arg delta: the discount factor (ignored if SemiNFG) :type delta: float :arg innoise: the perturbation noise for the inner loop to draw alt CPTs :type innoise: float """ if isinstance(G, iterSemiNFG): ufoo = G.npv_reward uargs = [p, G.starttime, delta] else: ufoo = G.utility uargs = [p] util = 0 for x in xrange(1,X+1): G.sample() util = (ufoo(*uargs)+(x-1)*util)/x altutil = [0]*M weight = np.ones(M) tick = 0 T0 = G.starttime bnlist = [x.basename for x in G.partition[p] if x.time==T0] if satisfice: #using the satisficing distribution for drawing alternatives G = satisfice for m in range(M): #Sample M alt policies for the player GG = copy.deepcopy(G) denw = 1 for bn in bnlist: #rand CPT for the DN #density for the importance sampling distribution if innoise == 1 or satisfice: GG.bn_part[bn][T0].perturbCPT(innoise, mixed=mix) else: denw *= GG.bn_part[bn][T0].perturbCPT(innoise, mixed=mix, \ returnweight=True) if not tick: numw = denw #scaling constant num to ~ magnitude of den weight[m] *= (numw/denw) tick += 1 # import pdb; pdb.set_trace() # for dn in GG.bn_part[bn][T0+1:]: # dn.CPT = GG.bn_part[bn][T0].CPT GG.sample() #sample altpolicy prof. to end of net if isinstance(GG, iterSemiNFG): altutil[m] = GG.npv_reward(p, GG.starttime, delta) else: altutil[m] = GG.utility(p) #weight of alts worse than G worse = [weight[m] for m in range(M) if altutil[m]<util] return np.sum(worse)/np.sum(weight) #fraction of alts worse than G is IQ
[docs]def mh_decision(pnew, pold, qnew=1, qold=1): """Decide to accept the new draw or keep the old one :arg pnew: the unnormalized likelihood of the new draw :type pnew: float :arg pold: the unnormalized likelihood of the old draw :type pnew: float :arg qnew: the probability of transitioning from the old draw to the new draw. :type qnew: float :arg qold: the probability of transitioning from the new draw to the old draw. :type qold: float """ if pold<=0 or qnew<=0: a = 1 else: a = min([(pnew*qold)/(pold*qnew), 1]) u = np.random.rand() if a > u: verdict = True else: verdict = False return verdict