Connect++ 0.6.0
A fast, readable connection prover for first-order logic.
Loading...
Searching...
No Matches
EXP3.hpp
1/*
2
3Copyright © 2023-24 Sean Holden. All rights reserved.
4
5*/
6/*
7
8This file is part of Connect++.
9
10Connect++ is free software: you can redistribute it and/or modify it
11under the terms of the GNU General Public License as published by the
12Free Software Foundation, either version 3 of the License, or (at your
13option) any later version.
14
15Connect++ is distributed in the hope that it will be useful, but WITHOUT
16ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18more details.
19
20You should have received a copy of the GNU General Public License along
21with Connect++. If not, see <https://www.gnu.org/licenses/>.
22
23*/
24
25#ifndef EXP3_HPP
26#define EXP3_HPP
27
28#include <iostream>
29#include <vector>
30#include <math.h>
31
32#include "Parameters.hpp"
33
34#include <boost/random/mersenne_twister.hpp>
35#include <boost/random/discrete_distribution.hpp>
36
37using std::vector;
38using std::cerr;
39using std::endl;
40using std::ostream;
41
51class EXP3 {
52private:
59 static boost::random::mt19937 random_generator;
60 vector<double> omega;
61 vector<double> p_values;
62 double gamma;
63 size_t K;
64 double one_minus_gamma;
65 double gamma_over_K;
74 size_t choice;
75public:
76 EXP3() = delete;
77 EXP3(size_t, double);
78
79 inline size_t get_choice() const {
80 return choice;
81 }
86 void set_gamma(double);
87
91 size_t choose();
95 void reward(double);
96
97 friend ostream& operator<<(ostream&, const EXP3&);
98};
99
100#endif
Implementation of the EXP3 algorithm for multiarmed bandits.
Definition EXP3.hpp:51
size_t choice
Store the last choice made.
Definition EXP3.hpp:74
bool choose_next
Belt-and braces: warn if choose/reward happens in the wrong order.
Definition EXP3.hpp:70
void set_gamma(double)
Reset gamma and associated members to something different.
Definition EXP3.cpp:47
void reward(double)
Provide reward for the most recent choice.
Definition EXP3.cpp:71
size_t choose()
Choose using the current state.
Definition EXP3.cpp:53
static boost::random::mt19937 random_generator
Random source.
Definition EXP3.hpp:59