v0.5.0/html/_e_x_p3_8cpp_source.html

/*


Copyright © 2023-24 Sean Holden. All rights reserved.


*/

/*


This file is part of Connect++.


Connect++ is free software: you can redistribute it and/or modify it

under the terms of the GNU General Public License as published by the

Free Software Foundation, either version 3 of the License, or (at your

option) any later version.


Connect++ is distributed in the hope that it will be useful, but WITHOUT

ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for

more details.


You should have received a copy of the GNU General Public License along

with Connect++. If not, see <https://www.gnu.org/licenses/>.


*/


#include "EXP3.hpp"


boost::random::mt19937 EXP3::random_generator(params::boost_random_seed);


//------------------------------------------------------------------

//------------------------------------------------------------------

//------------------------------------------------------------------

// EXP3

//------------------------------------------------------------------

//------------------------------------------------------------------

//------------------------------------------------------------------

EXP3::EXP3(size_t _K, double _gamma)

: K(_K)

, gamma(_gamma)

, omega(_K, 1.0)

, p_values(_K, 0.0)

, choose_next(true)

, one_minus_gamma(1 - _gamma)

, gamma_over_K(_gamma / static_cast<double>(_K))

, choice(0)

{}

//------------------------------------------------------------------


void EXP3::set_gamma(double _gamma) {

    gamma = _gamma;

    one_minus_gamma = 1 - _gamma;

    gamma_over_K = _gamma / static_cast<double>(K);

}


//------------------------------------------------------------------


size_t EXP3::choose() {

    if (!choose_next) {

        cerr << "STOP IT!! EXP3 should be receiving reward..." << endl;

    }

    choose_next = false;

    double omega_sum = 0.0;

    for (size_t i = 0; i < K; i++) {

        omega_sum += omega[i];

        p_values[i] = gamma_over_K;

    }

    for (size_t i = 0; i < K; i++) {

        p_values[i] += (one_minus_gamma * (omega[i]/omega_sum));

    }

    boost::random::discrete_distribution<> p(p_values.begin(), p_values.end());

    choice = p(random_generator);

    return choice;

}


//------------------------------------------------------------------


void EXP3::reward(double r) {

    if (choose_next) {

        cerr << "STOP IT!! EXP3 should be choosing..." << endl;

    }

    choose_next = true;

    if (r < 0.0 || r > 1.0) {

        cerr << "STOP IT!! The reward needs to be in [0,1]." << endl;

    }

    double x = r / p_values[choice];

    omega[choice] = omega[choice] * exp(x * gamma_over_K);

}


//------------------------------------------------------------------

ostream& operator<<(ostream& out, const EXP3& exp3) {

    out << "Omegas:" << endl;

    for (size_t i = 0; i < exp3.K; i++)

        out << exp3.omega[i] << " ";

    out << endl << "p values:" << endl;

    for (size_t i = 0; i < exp3.K; i++)

        out << exp3.p_values[i] << " ";

    out << endl;

    return out;

}

EXP3
Implementation of the EXP3 algorithm for multiarmed bandits.
Definition EXP3.hpp:51

EXP3::choice
size_t choice
Store the last choice made.
Definition EXP3.hpp:74

EXP3::choose_next
bool choose_next
Belt-and braces: warn if choose/reward happens in the wrong order.
Definition EXP3.hpp:70

EXP3::set_gamma
void set_gamma(double)
Reset gamma and associated members to something different.
Definition EXP3.cpp:47

EXP3::reward
void reward(double)
Provide reward for the most recent choice.
Definition EXP3.cpp:71

EXP3::choose
size_t choose()
Choose using the current state.
Definition EXP3.cpp:53

EXP3::random_generator
static boost::random::mt19937 random_generator
Random source.
Definition EXP3.hpp:59