# -*- coding: utf-8 -*-
"""
Implements PGT intelligence for iterSemiNFG objects
Part of: PyNFG - a Python package for modeling and solving Network Form Games
Created on Wed Jan 2 16:33:36 2013
Copyright (C) 2013 James Bono (jwbono@gmail.com)
GNU Affero General Public License
"""
from __future__ import division
import copy
import numpy as np
from pynfg import DecisionNode
from pynfg.utilities.utilities import mh_decision
import scipy.stats.distributions as randvars
[docs]def iterated_MC(G, S, noise, X, M, innoise=1, delta=1, integrand=None, \
mix=False, satisfice=None):
"""Run MC outer loop on random policy sequences for iterSemiNFG IQ calcs
:arg G: the iterated semiNFG to be evaluated
:type G: iterSemiNFG
:arg S: number of policy sequences to sample
:type S: int
:arg X: number of samples of each policy profile
:type X: int
:arg M: number of random alt policies to compare
:type M: int
:arg delta: the discount factor
:type delta: float
:arg integrand: a user-supplied function of G that is evaluated for each s
in S
:type integrand: func
.. warning::
This will throw an error if there is a decision node in G.starttime that
is not repeated throughout the net.
.. note::
This is an uncoordinated-approach because intelligence is assigned to a
DN instead of being assigned to a player.
"""
T0 = G.starttime
T = G.endtime
bnlist = [d.basename for d in G.time_partition[T0] if \
isinstance(d, DecisionNode)]
intel = {} #keys are MC iterations s, values are iq dicts
iq = {} #keys are base names, iq timestep series
funcout = {} #keys are s in S, vals are eval of integrand of G(s)
weight = {} #keys are
w = {}
for bn in bnlist: #preallocating iq dict entries
iq[bn] = np.zeros(T-T0+1)
for s in xrange(1, S+1): #sampling S sequences of policy profiles
print s
GG = copy.deepcopy(G)
for t in xrange(T0, T+1): #sampling a sequence of policy profiles
# gather list of decision nodes in time tout
for bn in bnlist: #drawing current policy
w[bn] = GG.bn_part[bn][t-T0].perturbCPT(noise, mixed=mix, \
returnweight=True)
for dd in GG.bn_part[bn][t-T0+1::]:
dd.CPT = GG.bn_part[bn][t-T0].CPT #apply policy to future
for bn in bnlist: #find the iq of each player's policy in turn
iq[bn][t-T0] = iterated_calciq(bn, GG, X, M, mix, delta, t, \
innoise, satisfice=None) #getting iq
if integrand is not None:
funcout[s] = integrand(GG) #eval integrand G(s), assign to funcout
intel[s] = copy.deepcopy(iq)
weight[s] = copy.deepcopy(w)
return intel, funcout, weight
[docs]def iterated_MH(G, S, density, noise, X, M, innoise=1, delta=1, \
integrand=None, mix=False, satisfice=None):
"""Run MH for iterSemiNFG IQ calcs
:arg G: the iterated semiNFG to be evaluated
:type G: iterSemiNFG
:arg S: number of MH iterations
:type S: int
:arg noise: the degree of independence of the proposal distribution on the
current value.
:type noise: float
:arg density: the function that assigns weights to iq
:type density: func
:arg X: number of samples of each policy profile
:type X: int
:arg M: number of random alt policies to compare
:type M: int
:arg delta: the discount factor
:type delta: float
:arg integrand: a user-supplied function of G that is evaluated for each s
in S
:type integrand: func
.. warning::
This will throw an error if there is a decision node in G.starttime that
is not repeated throughout the net.
.. note::
This is an uncoordinated-approach because intelligence is assigned to a
DN instead of being assigned to a player.
"""
T0 = G.starttime
T = G.endtime
dnlist = [d.basename for d in G.time_partition[T0] if \
isinstance(d, DecisionNode)]
intel = {} #keys are MC iterations s, values are iq dicts
iq = {} #keys are base names, iq timestep series
for dn in dnlist:
iq[dn] = np.zeros(T-T0+1) #preallocating iqs
funcout = {} #keys are s in S, vals are eval of integrand of G(s)
dens = np.zeros(S+1)
# gather list of decision nodes in base game
for s in xrange(1, S+1): #sampling S sequences of policy profiles
print s
GG = copy.deepcopy(G)
for t in xrange(T0, T+1):
for dn in dnlist:
GG.bn_part[dn][t-T0].CPT = G.bn_part[dn][t-T0].perturbCPT(\
noise, mixed=mix, setCPT=False)
for dd in GG.bn_part[dn][t-T0+1::]:
dd.CPT = GG.bn_part[dn][t-T0].CPT #apply policy to future
iq[dn][t-T0] = iterated_calciq(dn, G, X, M, mix, delta, t, \
innoise, satisfice=None) #getting iq
# The MH decision
current_dens = density(iq) #evaluating density of current draw's iq
verdict = mh_decision(current_dens, dens[s-1]) #True if accept new draw
if verdict: #accepting new CPT
intel[s] = copy.deepcopy(iq)
G = copy.deepcopy(GG)
dens[s] = current_dens
else:
intel[s] = intel[s-1]
dens[s] = dens[s-1]
if integrand is not None:
funcout[s] = integrand(G) #eval integrand G(s), assign to funcout
return intel, funcout, dens[1::]
[docs]def iterated_calciq(bn, G, X, M, mix, delta, start, innoise, satisfice=None):
"""Calc IQ of policy at dn,start in G from the given starting point
:arg bn: the basename of the DN to be evaluated
:type bn: str
:arg G: the iterated semiNFG to be evaluated
:type G: iterSemiNFG
:arg X: number of samples of each policy profile
:type X: int
:arg M: number of random alt policies with which to compare
:type M: int
:arg delta: the discount factor
:type delta: float
:arg start: the starting time step
:type start: int
"""
T0 = G.starttime
p = G.bn_part[bn][start-T0].player
util = 0
altutil = [0]*M
weight = np.ones(M)
tick = 0
for x in xrange(1,X+1):
G.sample()
util += G.npv_reward(p,start,delta)/X
if satisfice: #using the satisficing distribution for drawing alternatives
G = satisfice
for m in range(M): #Sample M alt policies for the player
GG = copy.deepcopy(G)
denw = 1
#density for the importance sampling distribution
if innoise == 1 or satisfice:
GG.bn_part[bn][start-T0].perturbCPT(innoise, mixed=mix)
else:
denw = GG.bn_part[bn][start-T0].perturbCPT(innoise, mixed=mix, \
returnweight=True)
if not tick:
numw = denw #scaling constant num to ~ magnitude of den
weight[m] = (numw/denw)
tick += 1
# import pdb; pdb.set_trace()
for dn in GG.bn_part[bn][start-T0+1::]:
dn.CPT = GG.bn_part[bn][start-T0].CPT
GG.sample_timesteps(start) #sample altpolicy prof. to end of net
altutil[m] = GG.npv_reward(p, start, delta)
#weight of alts worse than G
worse = [weight[m] for m in range(M) if altutil[m]<util]
return np.sum(worse)/np.sum(weight) #fraction of alts worse than G is IQ