Implementation of the EXP3 algorithm for multiarmed bandits.
More...
#include <EXP3.hpp>
|
| EXP3 (size_t, double) |
|
size_t | get_choice () const |
|
void | set_gamma (double) |
| Reset gamma and associated members to something different.
|
|
size_t | choose () |
| Choose using the current state.
|
|
void | reward (double) |
| Provide reward for the most recent choice.
|
|
Implementation of the EXP3 algorithm for multiarmed bandits.
It's easy enough to find a description of this algorithm. The implementation here is pretty much straight from:
https://en.wikipedia.org/wiki/Multi-armed_bandit
Definition at line 51 of file EXP3.hpp.
◆ EXP3()
EXP3::EXP3 |
( |
size_t | _K, |
|
|
double | _gamma ) |
Definition at line 36 of file EXP3.cpp.
37: K(_K)
38, gamma(_gamma)
39, omega(_K, 1.0)
40, p_values(_K, 0.0)
42, one_minus_gamma(1 - _gamma)
43, gamma_over_K(_gamma / static_cast<double>(_K))
45{}
size_t choice
Store the last choice made.
bool choose_next
Belt-and braces: warn if choose/reward happens in the wrong order.
◆ choose()
Choose using the current state.
Definition at line 53 of file EXP3.cpp.
53 {
55 cerr << "STOP IT!! EXP3 should be receiving reward..." << endl;
56 }
58 double omega_sum = 0.0;
59 for (size_t i = 0; i < K; i++) {
60 omega_sum += omega[i];
61 p_values[i] = gamma_over_K;
62 }
63 for (size_t i = 0; i < K; i++) {
64 p_values[i] += (one_minus_gamma * (omega[i]/omega_sum));
65 }
66 boost::random::discrete_distribution<> p(p_values.begin(), p_values.end());
69}
static boost::random::mt19937 random_generator
Random source.
◆ get_choice()
size_t EXP3::get_choice |
( |
| ) |
const |
|
inline |
◆ reward()
void EXP3::reward |
( |
double | r | ) |
|
Provide reward for the most recent choice.
Definition at line 71 of file EXP3.cpp.
71 {
73 cerr << "STOP IT!! EXP3 should be choosing..." << endl;
74 }
76 if (r < 0.0 || r > 1.0) {
77 cerr << "STOP IT!! The reward needs to be in [0,1]." << endl;
78 }
79 double x = r / p_values[
choice];
81}
◆ set_gamma()
void EXP3::set_gamma |
( |
double | _gamma | ) |
|
Reset gamma and associated members to something different.
Definition at line 47 of file EXP3.cpp.
47 {
48 gamma = _gamma;
49 one_minus_gamma = 1 - _gamma;
50 gamma_over_K = _gamma / static_cast<double>(K);
51}
◆ operator<<
ostream & operator<< |
( |
ostream & | out, |
|
|
const EXP3 & | exp3 ) |
|
friend |
Definition at line 83 of file EXP3.cpp.
83 {
84 out << "Omegas:" << endl;
85 for (size_t i = 0; i < exp3.K; i++)
86 out << exp3.omega[i] << " ";
87 out << endl << "p values:" << endl;
88 for (size_t i = 0; i < exp3.K; i++)
89 out << exp3.p_values[i] << " ";
90 out << endl;
91 return out;
92}
◆ choice
Store the last choice made.
Definition at line 74 of file EXP3.hpp.
◆ choose_next
Belt-and braces: warn if choose/reward happens in the wrong order.
Definition at line 70 of file EXP3.hpp.
◆ gamma
◆ gamma_over_K
double EXP3::gamma_over_K |
|
private |
◆ omega
vector<double> EXP3::omega |
|
private |
◆ one_minus_gamma
double EXP3::one_minus_gamma |
|
private |
◆ p_values
vector<double> EXP3::p_values |
|
private |
◆ random_generator
boost::random::mt19937 EXP3::random_generator |
|
staticprivate |
Random source.
Underlying random number generator for boost::random::discrete_distribution.
Definition at line 59 of file EXP3.hpp.
The documentation for this class was generated from the following files:
- /Users/sbh11/Desktop/connection-prover/c++/connect++/source/bandits/EXP3.hpp
- /Users/sbh11/Desktop/connection-prover/c++/connect++/source/bandits/EXP3.cpp