Source code for pynfg.pgtsolutions.intelligence.iterated

# -*- coding: utf-8 -*-
"""
Implements PGT intelligence for iterSemiNFG objects

Part of: PyNFG - a Python package for modeling and solving Network Form Games

Created on Wed Jan  2 16:33:36 2013

Copyright (C) 2013 James Bono (jwbono@gmail.com)

GNU Affero General Public License

"""
from __future__ import division
import copy
import numpy as np
from pynfg import DecisionNode
from pynfg.utilities.utilities import mh_decision
import scipy.stats.distributions as randvars

[docs]def iterated_MC(G, S, noise, X, M, innoise=1, delta=1, integrand=None, \ mix=False, satisfice=None): """Run MC outer loop on random policy sequences for iterSemiNFG IQ calcs :arg G: the iterated semiNFG to be evaluated :type G: iterSemiNFG :arg S: number of policy sequences to sample :type S: int :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies to compare :type M: int :arg delta: the discount factor :type delta: float :arg integrand: a user-supplied function of G that is evaluated for each s in S :type integrand: func .. warning:: This will throw an error if there is a decision node in G.starttime that is not repeated throughout the net. .. note:: This is an uncoordinated-approach because intelligence is assigned to a DN instead of being assigned to a player. """ T0 = G.starttime T = G.endtime bnlist = [d.basename for d in G.time_partition[T0] if \ isinstance(d, DecisionNode)] intel = {} #keys are MC iterations s, values are iq dicts iq = {} #keys are base names, iq timestep series funcout = {} #keys are s in S, vals are eval of integrand of G(s) weight = {} #keys are w = {} for bn in bnlist: #preallocating iq dict entries iq[bn] = np.zeros(T-T0+1) for s in xrange(1, S+1): #sampling S sequences of policy profiles print s GG = copy.deepcopy(G) for t in xrange(T0, T+1): #sampling a sequence of policy profiles # gather list of decision nodes in time tout for bn in bnlist: #drawing current policy w[bn] = GG.bn_part[bn][t-T0].perturbCPT(noise, mixed=mix, \ returnweight=True) for dd in GG.bn_part[bn][t-T0+1::]: dd.CPT = GG.bn_part[bn][t-T0].CPT #apply policy to future for bn in bnlist: #find the iq of each player's policy in turn iq[bn][t-T0] = iterated_calciq(bn, GG, X, M, mix, delta, t, \ innoise, satisfice=None) #getting iq if integrand is not None: funcout[s] = integrand(GG) #eval integrand G(s), assign to funcout intel[s] = copy.deepcopy(iq) weight[s] = copy.deepcopy(w) return intel, funcout, weight
[docs]def iterated_MH(G, S, density, noise, X, M, innoise=1, delta=1, \ integrand=None, mix=False, satisfice=None): """Run MH for iterSemiNFG IQ calcs :arg G: the iterated semiNFG to be evaluated :type G: iterSemiNFG :arg S: number of MH iterations :type S: int :arg noise: the degree of independence of the proposal distribution on the current value. :type noise: float :arg density: the function that assigns weights to iq :type density: func :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies to compare :type M: int :arg delta: the discount factor :type delta: float :arg integrand: a user-supplied function of G that is evaluated for each s in S :type integrand: func .. warning:: This will throw an error if there is a decision node in G.starttime that is not repeated throughout the net. .. note:: This is an uncoordinated-approach because intelligence is assigned to a DN instead of being assigned to a player. """ T0 = G.starttime T = G.endtime dnlist = [d.basename for d in G.time_partition[T0] if \ isinstance(d, DecisionNode)] intel = {} #keys are MC iterations s, values are iq dicts iq = {} #keys are base names, iq timestep series for dn in dnlist: iq[dn] = np.zeros(T-T0+1) #preallocating iqs funcout = {} #keys are s in S, vals are eval of integrand of G(s) dens = np.zeros(S+1) # gather list of decision nodes in base game for s in xrange(1, S+1): #sampling S sequences of policy profiles print s GG = copy.deepcopy(G) for t in xrange(T0, T+1): for dn in dnlist: GG.bn_part[dn][t-T0].CPT = G.bn_part[dn][t-T0].perturbCPT(\ noise, mixed=mix, setCPT=False) for dd in GG.bn_part[dn][t-T0+1::]: dd.CPT = GG.bn_part[dn][t-T0].CPT #apply policy to future iq[dn][t-T0] = iterated_calciq(dn, G, X, M, mix, delta, t, \ innoise, satisfice=None) #getting iq # The MH decision current_dens = density(iq) #evaluating density of current draw's iq verdict = mh_decision(current_dens, dens[s-1]) #True if accept new draw if verdict: #accepting new CPT intel[s] = copy.deepcopy(iq) G = copy.deepcopy(GG) dens[s] = current_dens else: intel[s] = intel[s-1] dens[s] = dens[s-1] if integrand is not None: funcout[s] = integrand(G) #eval integrand G(s), assign to funcout return intel, funcout, dens[1::]
[docs]def iterated_calciq(bn, G, X, M, mix, delta, start, innoise, satisfice=None): """Calc IQ of policy at dn,start in G from the given starting point :arg bn: the basename of the DN to be evaluated :type bn: str :arg G: the iterated semiNFG to be evaluated :type G: iterSemiNFG :arg X: number of samples of each policy profile :type X: int :arg M: number of random alt policies with which to compare :type M: int :arg delta: the discount factor :type delta: float :arg start: the starting time step :type start: int """ T0 = G.starttime p = G.bn_part[bn][start-T0].player util = 0 altutil = [0]*M weight = np.ones(M) tick = 0 for x in xrange(1,X+1): G.sample() util += G.npv_reward(p,start,delta)/X if satisfice: #using the satisficing distribution for drawing alternatives G = satisfice for m in range(M): #Sample M alt policies for the player GG = copy.deepcopy(G) denw = 1 #density for the importance sampling distribution if innoise == 1 or satisfice: GG.bn_part[bn][start-T0].perturbCPT(innoise, mixed=mix) else: denw = GG.bn_part[bn][start-T0].perturbCPT(innoise, mixed=mix, \ returnweight=True) if not tick: numw = denw #scaling constant num to ~ magnitude of den weight[m] = (numw/denw) tick += 1 # import pdb; pdb.set_trace() for dn in GG.bn_part[bn][start-T0+1::]: dn.CPT = GG.bn_part[bn][start-T0].CPT GG.sample_timesteps(start) #sample altpolicy prof. to end of net altutil[m] = GG.npv_reward(p, start, delta) #weight of alts worse than G worse = [weight[m] for m in range(M) if altutil[m]<util] return np.sum(worse)/np.sum(weight) #fraction of alts worse than G is IQ