v0.5.0/html/_e_r_w_a_8cpp_source.html

/*


Copyright © 2023-24 Sean Holden. All rights reserved.


*/

/*


This file is part of Connect++.


Connect++ is free software: you can redistribute it and/or modify it

under the terms of the GNU General Public License as published by the

Free Software Foundation, either version 3 of the License, or (at your

option) any later version.


Connect++ is distributed in the hope that it will be useful, but WITHOUT

ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for

more details.


You should have received a copy of the GNU General Public License along

with Connect++. If not, see <https://www.gnu.org/licenses/>.


*/


#include "ERWA.hpp"


boost::random::mt19937 ERWA::random_generator(params::boost_random_seed);


//------------------------------------------------------------------

//------------------------------------------------------------------

//------------------------------------------------------------------

// EXP3

//------------------------------------------------------------------

//------------------------------------------------------------------

//------------------------------------------------------------------

ERWA::ERWA(size_t _K, bool _eg, bool _a)

: K(_K)

, epsilon(0.0)

, alpha(0.0)

, p()

, p2(0, _K - 1)

, r_hat(_K, 0.0)

, choose_next(true)

, epsilon_greedy(_eg)

, alpha_is_1_over_n(_a)

, choice(0)

, n(0)

{}

//------------------------------------------------------------------


size_t ERWA::find_max() const {

    double r = std::numeric_limits<double>::min();

    size_t result = 0;

    for (int i = 0; i < r_hat.size(); i++) {

        double d = r_hat[i];

        if (d > r) {

            r = d;

            result = i;

        }

    }

    return result;

}


//------------------------------------------------------------------


size_t ERWA::choose() {

    if (!choose_next) {

        cerr << "STOP IT!! EXP3 should be receiving reward..." << endl;

    }

    choose_next = false;

    if (epsilon_greedy && p(random_generator)) {

        choice = p2(random_generator);

    }

    else {

        choice = find_max();

    }

    n++;

    return choice;

}


//------------------------------------------------------------------


void ERWA::reward(double reward) {

    if (choose_next) {

        cerr << "STOP IT!! EXP3 should be choosing..." << endl;

    }

    choose_next = true;

    double r = r_hat[choice];

    if (alpha_is_1_over_n) {

        r_hat[choice] = r + ((1 / static_cast<double>(n)) * (reward - r));

    }

    else {

        r_hat[choice] = r + (alpha * (reward - r));

    }

}


//------------------------------------------------------------------

ostream& operator<<(ostream& out, const ERWA& erwa) {

    out << "r_hats:" << endl;

    for (size_t i = 0; i < erwa.K; i++)

        out << erwa.r_hat[i] << " ";

    out << endl;

    return out;

}

ERWA
Implementation of the ERWA algorithm for multiarmed bandits.
Definition ERWA.hpp:53

ERWA::random_generator
static boost::random::mt19937 random_generator
Random source.
Definition ERWA.hpp:61

ERWA::choice
size_t choice
Store the last choice made.
Definition ERWA.hpp:79

ERWA::choose
size_t choose()
Choose using the current state.
Definition ERWA.cpp:63

ERWA::choose_next
bool choose_next
Belt-and braces: warn if choose/reward happens in the wrong order.
Definition ERWA.hpp:75

ERWA::find_max
size_t find_max() const
Find the index of the currently maximum r_hat.
Definition ERWA.cpp:50

ERWA::reward
void reward(double)
Provide reward for the most recent choice.
Definition ERWA.cpp:78