--- title: Replay Agents keywords: fastai sidebar: home_sidebar summary: "Replay Agents." description: "Replay Agents." nb_path: "nbs/rl/agents/rl.agents.replay_agents.ipynb" ---
# !pip install --upgrade --force-reinstall --no-deps kaggle
# !mkdir ~/.kaggle
# !cp /content/drive/MyDrive/kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d saurav9786/amazon-product-reviews
!unzip amazon-product-reviews.zip
import pandas as pd
header_list = ["User_ID", "Product_ID", "Rating", "Time_Stamp"]
rating_df = pd.read_csv('ratings_Electronics (1).csv', names=header_list)
reward_threshold = 4
rating_df['reward'] = rating_df.eval('Rating > @reward_threshold').astype(int)
n_visits = 500
n_iterations = 1
n_test_visits = 100
reward_history = rating_df[:1000]
item_col_name = 'Product_ID'
visitor_col_name = 'User_ID'
reward_col_name = 'reward'
print("A/B Test Simulations...starts...!!!")
ab_results = ABTestReplayer(n_visits, n_test_visits, reward_history,
item_col_name, visitor_col_name, reward_col_name,
n_iterations=n_iterations).simulator()
ab_results_df = pd.DataFrame(ab_results)
ab_results_df.to_csv('ab_results_df.csv')
print("Epsilon - Greedy Simulations...starts...!!!")
epsilon = 0.05
epsilon_results = EpsilonGreedyReplayer(epsilon, n_visits, reward_history,
item_col_name, visitor_col_name, reward_col_name,
n_iterations=n_iterations).simulator()
epsilon_results_df = pd.DataFrame(epsilon_results)
epsilon_results_df.to_csv('epsilon_results_df.csv')
print("Thompson Sampling Simulations...starts...!!!")
thompson_results = ThompsonSamplingReplayer(n_visits, reward_history,
item_col_name, visitor_col_name, reward_col_name,
n_iterations=n_iterations).simulator()
thompson_results_df = pd.DataFrame(thompson_results)
thompson_results_df.to_csv('thompson_results_df.csv')
print("Upper Confidence Bounds Simulations...starts...!!!")
ucb = 2
ucb_results = UCBSamplingReplayer(ucb, n_visits, reward_history,
item_col_name, visitor_col_name, reward_col_name,
n_iterations=n_iterations).simulator()
ucb_results_df = pd.DataFrame(ucb_results)
ucb_results_df.to_csv('ucb_results_df.csv')
ucb_results_df = pd.read_csv('ucb_results_df.csv').drop('Unnamed: 0', axis=1)
thompson_results_df = pd.read_csv('thompson_results_df.csv').drop('Unnamed: 0', axis=1)
epsilon_results_df = pd.read_csv('epsilon_results_df.csv').drop('Unnamed: 0', axis=1)
ab_results_df = pd.read_csv('ab_results_df.csv').drop('Unnamed: 0', axis=1)
#Grouping the each data frame with visit with mean
ucb_avg_results_df = ucb_results_df.groupby('visit', as_index=False).mean()
thompson_avg_results_df = thompson_results_df.groupby('visit', as_index=False).mean()
epsilon_avg_results_df = epsilon_results_df.groupby('visit', as_index=False).mean()
ab_avg_results_df = ab_results_df.groupby('visit', as_index=False).mean()
# using a color-blind friendly palette with 10 colors
color_blind_palette_10 = ['#cfcfcf', '#ffbc79', '#a2c8ec', '#898989', '#c85200',
'#5f9ed1', '#595959', '#ababab', '#ff800e', '#006ba4']
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12,10))
for (avg_results_df, style) in [(ucb_avg_results_df, 'r-'),
(thompson_avg_results_df, 'g--'),
(epsilon_avg_results_df, 'b-'),
(ab_avg_results_df, 'y--')]:
ax.plot(avg_results_df.visit, avg_results_df.fraction_relevant, style, linewidth=3.5)
ax.set_title('Percentage of Liked Recommendations')
ax.set_xlabel('Recommendation #')
ax.set_ylabel('% of Recs Clicked')
#ax.set_xticks(range(0,22000,5000))
#ax.set_ylim(0.2, 0.6)
#ax.set_yticks(np.arange(0.2, 0.7, 0.1))
#rescale the y-axis tick labels to show them as a percentage
ax.set_yticklabels((ax.get_yticks()*100).astype(int))
ax.legend(['UCB ',
'Thompson Sampling',
'$\epsilon$ Greedy',
'A/B Test'
],
loc='lower right'
)
plt.tight_layout()
plt.show()
From the above it is clear that Thompson smapling of multi arm bandit outperforms A/B testing. In the lower samples the A/B test perform better than all other algorithim, but as in when the number samples increase the thompson sampling starts performing beter and better.