Skip to content

Separate Rcpp from main logic #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SOURCES=lib/cpf.cpp lib/grid.cpp lib/helper.cpp lib/rpf.cpp lib/trees.cpp randomPlantedForest.cpp RcppExports.cpp
SOURCES=lib/cpf.cpp lib/grid.cpp lib/helper.cpp lib/rpf.cpp lib/trees.cpp lib/rcpp_interface.cpp lib/random_utils.cpp randomPlantedForest.cpp RcppExports.cpp

OBJECTS = $(SOURCES:.cpp=.o)

Expand Down
50 changes: 32 additions & 18 deletions src/include/cpf.hpp
Original file line number Diff line number Diff line change
@@ -1,34 +1,48 @@
#ifndef CPF_H
#define CPF_H

#include <vector>
#include "rpf.hpp"
enum LossType
{
L1,
L2,
median,
logit,
logit_2,
logit_3,
logit_4,
exponential,
exponential_2,
exponential_3
};
struct CPFParams
{
int max_interaction;
int n_trees;
int n_splits;
int split_try;
double t_try;
bool purify_forest;
bool deterministic;
int nthreads;
bool cross_validate;
LossType loss;
};

class ClassificationRPF : public RandomPlantedForest
{

public:
using RandomPlantedForest::calcOptimalSplit;
ClassificationRPF(const NumericMatrix &samples_Y, const NumericMatrix &samples_X,
const String loss = "L2", const NumericVector parameters = {1, 50, 30, 10, 0.4, 0, 0, 0, 0, 0, 0.1});
void set_parameters(StringVector keys, NumericVector values);
~ClassificationRPF(){};
ClassificationRPF(const std::vector<std::vector<double>> &samples_Y, const std::vector<std::vector<double>> &samples_X,
const std::string loss = "L2", const std::vector<double> parameters = {1, 50, 30, 10, 0.4, 0, 0, 0, 0, 0, 0.1});
~ClassificationRPF() {};
CPFParams get_parameters();

private:
double delta;
double epsilon;
enum LossType
{
L1,
L2,
median,
logit,
logit_2,
logit_3,
logit_4,
exponential,
exponential_2,
exponential_3
};
LossType loss;
void (ClassificationRPF::*calcLoss)(Split &);
void create_tree_family(std::vector<Leaf> initial_leaves, size_t n) override;
Expand All @@ -47,4 +61,4 @@ class ClassificationRPF : public RandomPlantedForest
void exponential_loss_3(Split &split);
};

#endif
#endif
3 changes: 2 additions & 1 deletion src/include/grid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define GRID_H

#include "helper.hpp"
#include <vector>

using namespace utils;

Expand Down Expand Up @@ -39,4 +40,4 @@ namespace grid
};
};

#endif
#endif
27 changes: 11 additions & 16 deletions src/include/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#include <memory>
#include <vector>
#include <utility>
#include <Rcpp.h>
#include <thread>
#include <assert.h>
#include "random_utils.hpp"

#ifndef UTILS_H
#define UTILS_H
Expand Down Expand Up @@ -101,26 +101,21 @@ namespace utils
template <typename Iter>
void shuffle_vector(Iter first, Iter last)
{
int n = std::distance(first, last);
while (n > 1)
{
int k = random_index(n--);
std::swap(*(first + n), *(first + k));
}
RandomGenerator::shuffle(first, last);
};

std::vector<int> to_std_vec(Rcpp::IntegerVector rv);
std::vector<double> to_std_vec(Rcpp::NumericVector rv);
std::vector<std::vector<double>> to_std_vec(Rcpp::NumericMatrix rv);
std::set<int> to_std_set(Rcpp::NumericVector rv);
std::set<int> to_std_set(Rcpp::IntegerVector rv);
std::vector<int> to_std_vec(std::vector<int> rv);
std::vector<double> to_std_vec(std::vector<double> rv);
std::vector<std::vector<double>> to_std_vec(std::vector<std::vector<double>> rv);
std::set<int> to_std_set(std::vector<double> rv);
std::set<int> to_std_set(std::vector<int> rv);

// functions for converting R and Cpp types

Rcpp::IntegerVector from_std_set(std::set<int> v);
Rcpp::IntegerVector from_std_vec(std::vector<int> v);
Rcpp::NumericVector from_std_vec(std::vector<double> v);
Rcpp::NumericMatrix from_std_vec(std::vector<std::vector<double>> v);
std::vector<int> from_std_set(std::set<int> v);
std::vector<int> from_std_vec(std::vector<int> v);
std::vector<double> from_std_vec(std::vector<double> v);
std::vector<std::vector<double>> from_std_vec(std::vector<std::vector<double>> v);

// ----------------- overload of vector operators -----------------

Expand Down
144 changes: 144 additions & 0 deletions src/include/random_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#ifndef RANDOM_UTILS_H
#define RANDOM_UTILS_H

#include <random>
#include <vector>
#include <algorithm>
#include <type_traits>
#include <chrono>
#include <Rcpp.h>

using namespace Rcpp;

namespace utils
{

// Abstract base class for random number generators
class RNGBackend
{
public:
virtual ~RNGBackend() = default;
virtual int random_int(int n) = 0;
virtual double random_double() = 0;
};

// Standard C++ random number generator backend
class StdRNGBackend : public RNGBackend
{
private:
static thread_local std::mt19937 generator;
static bool seeded;

public:
void seed(uint32_t seed)
{
generator.seed(seed);
seeded = true;
}

void initialize()
{
if (!seeded)
{
auto now = std::chrono::high_resolution_clock::now();
auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(
now.time_since_epoch())
.count();
generator.seed(static_cast<uint32_t>(nanos));
seeded = true;
}
}

int random_int(int n) override
{
initialize();
std::uniform_int_distribution<int> dist(0, n - 1);
return dist(generator);
}

double random_double() override
{
initialize();
std::uniform_real_distribution<double> dist(0.0, 1.0);
return dist(generator);
}
};

// R random number generator backend
class RcppRNGBackend : public RNGBackend
{
public:
int random_int(int n) override
{
RNGScope scope;
return static_cast<int>(R::runif(0, 1) * n);
}

double random_double() override
{
RNGScope scope;
return R::runif(0, 1);
}
};

class RandomGenerator
{
private:
static RNGBackend *backend;
static StdRNGBackend std_backend;
static RcppRNGBackend rcpp_backend;

public:
// Generate random integer in range [0, n)
static int random_index(int n)
{
return backend->random_int(n);
}

// Generate random double in range [0, 1)
static double random_double()
{
return backend->random_double();
}

// Shuffle a range of elements
template <typename Iter>
static void shuffle(Iter first, Iter last)
{
auto n = std::distance(first, last);
for (auto i = n - 1; i > 0; --i)
{
std::swap(*(first + i), *(first + backend->random_int(i + 1)));
}
}

// Sample n elements with replacement
template <typename T>
static std::vector<T> sample_with_replacement(const std::vector<T> &population, size_t n)
{
std::vector<T> result;
result.reserve(n);
for (size_t i = 0; i < n; ++i)
{
result.push_back(population[random_index(population.size())]);
}
return result;
}

// Switch to using R's RNG
static void use_r_random()
{
backend = &rcpp_backend;
}

// Switch to using C++ standard RNG
static void use_std_random(uint32_t seed = 42)
{
std_backend.seed(seed);
backend = &std_backend;
}
};

} // namespace utils

#endif // RANDOM_UTILS_H
59 changes: 59 additions & 0 deletions src/include/rcpp_interface.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef RCPP_INTERFACE_H
#define RCPP_INTERFACE_H

#include "cpf.hpp"
#include "rpf.hpp"
#include <Rcpp.h>

using namespace Rcpp;

class RcppInterface
{
public:
virtual NumericMatrix predict_matrix(const NumericMatrix &X, const NumericVector components) = 0;
virtual NumericMatrix predict_vector(const NumericVector &X, const NumericVector components) = 0;
virtual void cross_validation(int n_sets, IntegerVector splits, NumericVector t_tries, IntegerVector split_tries) = 0;
virtual double MSE(const NumericMatrix &Y_predicted, const NumericMatrix &Y_true) = 0;
virtual List get_model() = 0;
};

class RcppRPF : public RandomPlantedForest, public RcppInterface
{

public:
RcppRPF(const NumericMatrix &samples_Y, const NumericMatrix &samples_X,
const NumericVector parameters = {1, 50, 30, 10, 0.4, 0, 0, 0, 0});
RcppRPF() {};
NumericMatrix predict_matrix(const NumericMatrix &X, const NumericVector components = {0}) override;
NumericMatrix predict_vector(const NumericVector &X, const NumericVector components = {0}) override;
void cross_validation(int n_sets, IntegerVector splits, NumericVector t_tries, IntegerVector split_tries) override;
double MSE(const NumericMatrix &Y_predicted, const NumericMatrix &Y_true) override;
List get_model() override;

void purify_3();
void print();
List get_parameters();
bool is_purified();

protected:
double MSE_vec(const NumericVector &Y_predicted, const NumericVector &Y_true);
};

class RcppCPF : public ClassificationRPF, public RcppInterface
{
public:
RcppCPF(const NumericMatrix &samples_Y, const NumericMatrix &samples_X,
const std::string loss = "L2", const NumericVector parameters = {1, 50, 30, 10, 0.4, 0, 0, 0, 0});
NumericMatrix predict_matrix(const NumericMatrix &X, const NumericVector components = {0}) override;
NumericMatrix predict_vector(const NumericVector &X, const NumericVector components = {0}) override;
void cross_validation(int n_sets, IntegerVector splits, NumericVector t_tries, IntegerVector split_tries) override;
double MSE(const NumericMatrix &Y_predicted, const NumericMatrix &Y_true) override;
List get_model() override;

void purify_3();
void print();
List get_parameters();
bool is_purified();
};

#endif // RCPP_INTERFACE_H
Loading
Loading