Implementation of the ERWA algorithm for multiarmed bandits. More...

#include <ERWA.hpp>

Collaboration diagram for ERWA:

Public Member Functions
	ERWA (size_t, bool=true, bool=true)

size_t	get_choice () const

void	set_epsilon (double _e)
	Reset epsilon to something different.

void	set_alpha (double _a)
	Reset alpha to something different.

size_t	choose ()
	Choose using the current state.

void	reward (double)
	Provide reward for the most recent choice.

Private Member Functions
size_t	find_max () const
	Find the index of the currently maximum r_hat.

Private Attributes
boost::random::bernoulli_distribution	p

boost::random::uniform_int_distribution	p2

vector< double >	r_hat

double	epsilon

double	alpha

size_t	K

size_t	n

bool	epsilon_greedy

bool	alpha_is_1_over_n

bool	choose_next
	Belt-and braces: warn if choose/reward happens in the wrong order.

size_t	choice
	Store the last choice made.

Static Private Attributes
static boost::random::mt19937	random_generator
	Random source.

Friends
ostream &	operator<< (ostream &, const ERWA &)

Detailed Description

Implementation of the ERWA algorithm for multiarmed bandits.

It's easy enough to find a description of this algorithm. The implementation here is pretty much straight from my book, with additions based o Suttin and Barto. In the latter, they (1) have alpha(n) = 1/n and (2) allow epsilon-greedyness. So those possibilities have been added.

Definition at line 53 of file ERWA.hpp.

Constructor & Destructor Documentation

◆ ERWA()

ERWA::ERWA	(	size_t	_K,
		bool	_eg = true,
		bool	_a = true )

Definition at line 36 of file ERWA.cpp.

: K(_K)
, epsilon(0.0)
, alpha(0.0)
, p()
, p2(0, _K - 1)
, r_hat(_K, 0.0)
, choose_next(true)
, epsilon_greedy(_eg)
, alpha_is_1_over_n(_a)
, choice(0)
, n(0)
{}

Member Function Documentation

◆ choose()

size_t ERWA::choose ( )

Choose using the current state.

Definition at line 63 of file ERWA.cpp.

                    {
    if (!choose_next) {
        cerr << "STOP IT!! EXP3 should be receiving reward..." << endl;
    }
    choose_next = false;
    if (epsilon_greedy && p(random_generator)) {
        choice = p2(random_generator);
    }
    else {
        choice = find_max();
    }
    n++;
    return choice;
}

◆ find_max()

size_t ERWA::find_max ( ) const

private

Find the index of the currently maximum r_hat.

Definition at line 50 of file ERWA.cpp.

                            {
    double r = std::numeric_limits<double>::min();
    size_t result = 0; 
    for (int i = 0; i < r_hat.size(); i++) {
        double d = r_hat[i];
        if (d > r) {
            r = d;
            result = i;
        }
    }
    return result;
}

◆ get_choice()

size_t ERWA::get_choice ( ) const

inline

Definition at line 89 of file ERWA.hpp.

                                     {
        return choice;
    }

◆ reward()

void ERWA::reward ( double reward )

Provide reward for the most recent choice.

Definition at line 78 of file ERWA.cpp.

                               {
    if (choose_next) {
        cerr << "STOP IT!! EXP3 should be choosing..." << endl;
    }
    choose_next = true;
    double r = r_hat[choice];
    if (alpha_is_1_over_n) {
        r_hat[choice] = r + ((1 / static_cast<double>(n)) * (reward - r));
    }
    else {
        r_hat[choice] = r + (alpha * (reward - r));
    }
}

◆ set_alpha()

void ERWA::set_alpha ( double _a )

inline

Reset alpha to something different.

Definition at line 103 of file ERWA.hpp.

                                     {
        alpha = _a;
    }

◆ set_epsilon()

void ERWA::set_epsilon ( double _e )

inline

Reset epsilon to something different.

Definition at line 95 of file ERWA.hpp.

                                       {
        epsilon = _e;
        boost::random::bernoulli_distribution<> new_p(_e);
        p = new_p;
    }

Friends And Related Symbol Documentation

◆ operator<<

ostream & operator<<	(	ostream &	out,
		const ERWA &	erwa )

friend

Definition at line 92 of file ERWA.cpp.

                                                    {
    out << "r_hats:" << endl;
    for (size_t i = 0; i < erwa.K; i++)
        out << erwa.r_hat[i] << " ";
    out << endl;
    return out;
}

Member Data Documentation

◆ alpha

double ERWA::alpha

private

Definition at line 66 of file ERWA.hpp.

◆ alpha_is_1_over_n

bool ERWA::alpha_is_1_over_n

private

Definition at line 70 of file ERWA.hpp.

◆ choice

size_t ERWA::choice

private

Store the last choice made.

Definition at line 79 of file ERWA.hpp.

◆ choose_next

bool ERWA::choose_next

private

Belt-and braces: warn if choose/reward happens in the wrong order.

Definition at line 75 of file ERWA.hpp.

◆ epsilon

double ERWA::epsilon

private

Definition at line 65 of file ERWA.hpp.

◆ epsilon_greedy

bool ERWA::epsilon_greedy

private

Definition at line 69 of file ERWA.hpp.

◆ K

size_t ERWA::K

private

Definition at line 67 of file ERWA.hpp.

◆ n

size_t ERWA::n

private

Definition at line 68 of file ERWA.hpp.

◆ p

boost::random::bernoulli_distribution ERWA::p

private

Definition at line 62 of file ERWA.hpp.

◆ p2

boost::random::uniform_int_distribution ERWA::p2

private

Definition at line 63 of file ERWA.hpp.

◆ r_hat

vector<double> ERWA::r_hat

private

Definition at line 64 of file ERWA.hpp.

◆ random_generator

boost::random::mt19937 ERWA::random_generator

staticprivate

Random source.

Underlying random number generator for epsilon-greedyness.

Definition at line 61 of file ERWA.hpp.

The documentation for this class was generated from the following files:

/Users/sbh11/Desktop/connection-prover/c++/connect++/source/bandits/ERWA.hpp
/Users/sbh11/Desktop/connection-prover/c++/connect++/source/bandits/ERWA.cpp

Public Member Functions

Private Member Functions

Private Attributes

Static Private Attributes

Friends

Detailed Description

Constructor & Destructor Documentation

◆ ERWA()

Member Function Documentation

◆ choose()

◆ find_max()

◆ get_choice()

◆ reward()

◆ set_alpha()

◆ set_epsilon()

Friends And Related Symbol Documentation

◆ operator<<

Member Data Documentation

◆ alpha

◆ alpha_is_1_over_n

◆ choice

◆ choose_next

◆ epsilon

◆ epsilon_greedy

◆ K

◆ n

◆ p

◆ p2

◆ r_hat

◆ random_generator