Connect++ 0.4.0
A fast, readable connection prover for first-order logic.
Loading...
Searching...
No Matches
EXP3 Class Reference

Implementation of the EXP3 algorithm for multiarmed bandits. More...

#include <EXP3.hpp>

Collaboration diagram for EXP3:

Public Member Functions

 EXP3 (size_t, double)
 
size_t get_choice () const
 
void set_gamma (double)
 Reset gamma and associated members to something different.
 
size_t choose ()
 Choose using the current state.
 
void reward (double)
 Provide reward for the most recent choice.
 

Private Attributes

vector< double > omega
 
vector< double > p_values
 
double gamma
 
size_t K
 
double one_minus_gamma
 
double gamma_over_K
 
bool choose_next
 Belt-and braces: warn if choose/reward happens in the wrong order.
 
size_t choice
 Store the last choice made.
 

Static Private Attributes

static boost::random::mt19937 random_generator
 Random source.
 

Friends

ostream & operator<< (ostream &, const EXP3 &)
 

Detailed Description

Implementation of the EXP3 algorithm for multiarmed bandits.

It's easy enough to find a description of this algorithm. The implementation here is pretty much straight from:

https://en.wikipedia.org/wiki/Multi-armed_bandit

Definition at line 51 of file EXP3.hpp.

Constructor & Destructor Documentation

◆ EXP3()

EXP3::EXP3 ( size_t _K,
double _gamma )

Definition at line 36 of file EXP3.cpp.

37: K(_K)
38, gamma(_gamma)
39, omega(_K, 1.0)
40, p_values(_K, 0.0)
41, choose_next(true)
42, one_minus_gamma(1 - _gamma)
43, gamma_over_K(_gamma / static_cast<double>(_K))
44, choice(0)
45{}
size_t choice
Store the last choice made.
Definition EXP3.hpp:74
bool choose_next
Belt-and braces: warn if choose/reward happens in the wrong order.
Definition EXP3.hpp:70

Member Function Documentation

◆ choose()

size_t EXP3::choose ( )

Choose using the current state.

Definition at line 53 of file EXP3.cpp.

53 {
54 if (!choose_next) {
55 cerr << "STOP IT!! EXP3 should be receiving reward..." << endl;
56 }
57 choose_next = false;
58 double omega_sum = 0.0;
59 for (size_t i = 0; i < K; i++) {
60 omega_sum += omega[i];
61 p_values[i] = gamma_over_K;
62 }
63 for (size_t i = 0; i < K; i++) {
64 p_values[i] += (one_minus_gamma * (omega[i]/omega_sum));
65 }
66 boost::random::discrete_distribution<> p(p_values.begin(), p_values.end());
68 return choice;
69}
static boost::random::mt19937 random_generator
Random source.
Definition EXP3.hpp:59

◆ get_choice()

size_t EXP3::get_choice ( ) const
inline

Definition at line 79 of file EXP3.hpp.

79 {
80 return choice;
81 }

◆ reward()

void EXP3::reward ( double r)

Provide reward for the most recent choice.

Definition at line 71 of file EXP3.cpp.

71 {
72 if (choose_next) {
73 cerr << "STOP IT!! EXP3 should be choosing..." << endl;
74 }
75 choose_next = true;
76 if (r < 0.0 || r > 1.0) {
77 cerr << "STOP IT!! The reward needs to be in [0,1]." << endl;
78 }
79 double x = r / p_values[choice];
80 omega[choice] = omega[choice] * exp(x * gamma_over_K);
81}

◆ set_gamma()

void EXP3::set_gamma ( double _gamma)

Reset gamma and associated members to something different.

Definition at line 47 of file EXP3.cpp.

47 {
48 gamma = _gamma;
49 one_minus_gamma = 1 - _gamma;
50 gamma_over_K = _gamma / static_cast<double>(K);
51}

Friends And Related Symbol Documentation

◆ operator<<

ostream & operator<< ( ostream & out,
const EXP3 & exp3 )
friend

Definition at line 83 of file EXP3.cpp.

83 {
84 out << "Omegas:" << endl;
85 for (size_t i = 0; i < exp3.K; i++)
86 out << exp3.omega[i] << " ";
87 out << endl << "p values:" << endl;
88 for (size_t i = 0; i < exp3.K; i++)
89 out << exp3.p_values[i] << " ";
90 out << endl;
91 return out;
92}

Member Data Documentation

◆ choice

size_t EXP3::choice
private

Store the last choice made.

Definition at line 74 of file EXP3.hpp.

◆ choose_next

bool EXP3::choose_next
private

Belt-and braces: warn if choose/reward happens in the wrong order.

Definition at line 70 of file EXP3.hpp.

◆ gamma

double EXP3::gamma
private

Definition at line 62 of file EXP3.hpp.

◆ gamma_over_K

double EXP3::gamma_over_K
private

Definition at line 65 of file EXP3.hpp.

◆ K

size_t EXP3::K
private

Definition at line 63 of file EXP3.hpp.

◆ omega

vector<double> EXP3::omega
private

Definition at line 60 of file EXP3.hpp.

◆ one_minus_gamma

double EXP3::one_minus_gamma
private

Definition at line 64 of file EXP3.hpp.

◆ p_values

vector<double> EXP3::p_values
private

Definition at line 61 of file EXP3.hpp.

◆ random_generator

boost::random::mt19937 EXP3::random_generator
staticprivate

Random source.

Underlying random number generator for boost::random::discrete_distribution.

Definition at line 59 of file EXP3.hpp.


The documentation for this class was generated from the following files: