Skip to content

Commit a7da1b6

Browse files
committed
Merge pull request #5 from graehl/master
c++11
2 parents 28bdadf + c461c4a commit a7da1b6

20 files changed

+3304
-3145
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ src/prepareNeuralTM
88
src/testNeuralLM
99
src/testNeuralNetwork
1010
src/trainNeuralNetwork
11+
.history
12+
src/make.sh

src/Activation_function.h

+65-64
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include <cmath>
55
#include <string>
6-
//#include <../3rdparty/Eigen/Dense>
76
#include <Eigen/Dense>
87

98
#include "util.h"
@@ -19,28 +18,28 @@ enum activation_function_type { Tanh, HardTanh, Rectifier, Identity, InvalidFunc
1918

2019
inline activation_function_type string_to_activation_function (const std::string &s)
2120
{
22-
if (s == "identity")
23-
return Identity;
24-
else if (s == "rectifier")
25-
return Rectifier;
26-
else if (s == "tanh")
27-
return Tanh;
28-
else if (s == "hardtanh")
29-
return HardTanh;
30-
else
31-
return InvalidFunction;
21+
if (s == "identity")
22+
return Identity;
23+
else if (s == "rectifier")
24+
return Rectifier;
25+
else if (s == "tanh")
26+
return Tanh;
27+
else if (s == "hardtanh")
28+
return HardTanh;
29+
else
30+
return InvalidFunction;
3231
}
3332

3433
inline std::string activation_function_to_string (activation_function_type f)
3534
{
36-
if (f == Identity)
37-
return "identity";
38-
else if (f == Rectifier)
39-
return "rectifier";
40-
else if (f == Tanh)
41-
return "tanh";
42-
else if (f == HardTanh)
43-
return "hardtanh";
35+
if (f == Identity)
36+
return "identity";
37+
else if (f == Rectifier)
38+
return "rectifier";
39+
else if (f == Tanh)
40+
return "tanh";
41+
else if (f == HardTanh)
42+
return "hardtanh";
4443
}
4544

4645
struct hardtanh_functor {
@@ -69,51 +68,53 @@ struct drectifier_functor {
6968

7069
class Activation_function
7170
{
72-
int size;
73-
activation_function_type f;
74-
75-
public:
76-
Activation_function() : size(0), f(Rectifier) { }
77-
78-
void resize(int size) { this->size = size; }
79-
void set_activation_function(activation_function_type f) { this->f = f; }
80-
81-
template <typename Engine>
82-
void initialize(Engine &engine, bool init_normal, double init_range) { }
83-
84-
int n_inputs () const { return size; }
85-
int n_outputs () const { return size; }
86-
87-
template <typename DerivedIn, typename DerivedOut>
88-
void fProp(const MatrixBase<DerivedIn> &input, const MatrixBase<DerivedOut> &output) const
89-
{
90-
UNCONST(DerivedOut, output, my_output);
91-
92-
switch (f)
93-
{
94-
case Identity: my_output = input; break;
95-
case Rectifier: my_output = input.unaryExpr(rectifier_functor()); break;
96-
case Tanh: my_output = input.unaryExpr(tanh_functor()); break;
97-
case HardTanh: my_output = input.unaryExpr(hardtanh_functor()); break;
98-
}
99-
}
100-
101-
template <typename DerivedGOut, typename DerivedGIn, typename DerivedIn, typename DerivedOut>
102-
void bProp(const MatrixBase<DerivedGOut> &input,
103-
MatrixBase<DerivedGIn> &output,
104-
const MatrixBase<DerivedIn> &finput,
105-
const MatrixBase<DerivedOut> &foutput) const
106-
{
107-
UNCONST(DerivedGIn, output, my_output);
108-
109-
switch (f)
110-
{
111-
case Identity: my_output = input; break;
112-
case Rectifier: my_output = finput.array().unaryExpr(drectifier_functor()) * input.array(); break;
113-
case Tanh: my_output = foutput.array().unaryExpr(tanh_functor()) * input.array(); break;
114-
case HardTanh: my_output = finput.array().unaryExpr(hardtanh_functor()) * input.array(); break;
115-
}
116-
}
71+
int size;
72+
activation_function_type f;
73+
74+
public:
75+
Activation_function() : size(0), f(Rectifier) { }
76+
77+
void resize(int size) { this->size = size; }
78+
void set_activation_function(activation_function_type f) { this->f = f; }
79+
80+
template <typename Engine>
81+
void initialize(Engine &engine, bool init_normal, double init_range) { }
82+
83+
int n_inputs () const { return size; }
84+
int n_outputs () const { return size; }
85+
86+
template <typename DerivedIn, typename DerivedOut>
87+
void fProp(const MatrixBase<DerivedIn> &input, const MatrixBase<DerivedOut> &output) const
88+
{
89+
UNCONST(DerivedOut, output, my_output);
90+
91+
switch (f)
92+
{
93+
case Identity: my_output = input; break;
94+
case Rectifier: my_output = input.unaryExpr(rectifier_functor()); break;
95+
case Tanh: my_output = input.unaryExpr(tanh_functor()); break;
96+
case HardTanh: my_output = input.unaryExpr(hardtanh_functor()); break;
97+
case InvalidFunction: std::abort();
98+
}
99+
}
100+
101+
template <typename DerivedGOut, typename DerivedGIn, typename DerivedIn, typename DerivedOut>
102+
void bProp(const MatrixBase<DerivedGOut> &input,
103+
MatrixBase<DerivedGIn> &output,
104+
const MatrixBase<DerivedIn> &finput,
105+
const MatrixBase<DerivedOut> &foutput) const
106+
{
107+
UNCONST(DerivedGIn, output, my_output);
108+
109+
switch (f)
110+
{
111+
case Identity: my_output = input; break;
112+
case Rectifier: my_output = finput.array().unaryExpr(drectifier_functor()) * input.array(); break;
113+
case Tanh: my_output = foutput.array().unaryExpr(tanh_functor()) * input.array(); break;
114+
case HardTanh: my_output = finput.array().unaryExpr(hardtanh_functor()) * input.array(); break;
115+
case InvalidFunction: std::abort();
116+
}
117+
}
117118
};
118119

119120
} // namespace nplm

src/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
### Compilation options.
22

33
# C++ compiler. Tested with g++ and Intel icpc.
4-
CXX=/usr/bin/g++
4+
CXX=g++
55
#CXX=icpc
66

77
# Compiler options. Note that -DEIGEN_NO_DEBUG is essential for good performance!
88
#CFLAGS=-g
9-
CFLAGS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
9+
CFLAGS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG $(CXXFLAGS)
1010

1111
# Architecture. Set to x86_64 or i686 to override.
1212
ARCH:=$(shell uname -m)

src/SoftmaxLoss.h

+79-80
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
#ifndef SOFTMAXLOSS_H
1+
#ifndef SOFTMAXLOSS_H
22
#define SOFTMAXLOSS_H
33

4-
//#include <../3rdparty/Eigen/Dense>
54
#include <Eigen/Dense>
65
#include "multinomial.h"
76
#include "util.h"
@@ -20,55 +19,55 @@ enum loss_function_type { LogLoss, NCELoss, InvalidLoss };
2019

2120
inline loss_function_type string_to_loss_function (const std::string &s)
2221
{
23-
if (s == "log")
24-
return LogLoss;
25-
else if (s == "nce")
26-
return NCELoss;
27-
else
28-
return InvalidLoss;
22+
if (s == "log")
23+
return LogLoss;
24+
else if (s == "nce")
25+
return NCELoss;
26+
else
27+
return InvalidLoss;
2928
}
3029

3130
inline std::string loss_function_to_string (loss_function_type f)
3231
{
33-
if (f == LogLoss)
34-
return "log";
35-
else if (f == NCELoss)
36-
return "nce";
32+
if (f == LogLoss)
33+
return "log";
34+
else if (f == NCELoss)
35+
return "nce";
3736
}
3837

3938
/// Note: Outputs log-probabilities.
4039

4140
struct SoftmaxLogLoss
4241
{
43-
template <typename DerivedI, typename DerivedW, typename DerivedO>
44-
void fProp(const MatrixBase<DerivedI> &input, const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output_const, double &loss)
42+
template <typename DerivedI, typename DerivedW, typename DerivedO>
43+
void fProp(const MatrixBase<DerivedI> &input, const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output_const, double &loss)
44+
{
45+
UNCONST(DerivedO, output_const, output);
46+
47+
double log_likelihood = 0.0;
48+
49+
#pragma omp parallel for reduction(+:log_likelihood)
50+
for (int train_id = 0; train_id < input.cols(); train_id++)
4551
{
46-
UNCONST(DerivedO, output_const, output);
47-
48-
double log_likelihood = 0.0;
49-
50-
#pragma omp parallel for reduction(+:log_likelihood)
51-
for (int train_id = 0; train_id < input.cols(); train_id++)
52-
{
53-
double normalization = logsum(input.col(train_id));
54-
output.col(train_id).array() = input.col(train_id).array() - normalization;
55-
log_likelihood += output(output_words(train_id), train_id);
56-
}
57-
loss = log_likelihood;
52+
double normalization = logsum(input.col(train_id));
53+
output.col(train_id).array() = input.col(train_id).array() - normalization;
54+
log_likelihood += output(output_words(train_id), train_id);
5855
}
59-
60-
template <typename DerivedW, typename DerivedO, typename DerivedI>
61-
void bProp(const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output, const MatrixBase<DerivedI> &grad_input_const)
56+
loss = log_likelihood;
57+
}
58+
59+
template <typename DerivedW, typename DerivedO, typename DerivedI>
60+
void bProp(const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output, const MatrixBase<DerivedI> &grad_input_const)
61+
{
62+
UNCONST(DerivedI, grad_input_const, grad_input);
63+
grad_input.setZero();
64+
#pragma omp parallel for
65+
for (int train_id = 0; train_id < output.cols(); train_id++)
6266
{
63-
UNCONST(DerivedI, grad_input_const, grad_input);
64-
grad_input.setZero();
65-
#pragma omp parallel for
66-
for (int train_id = 0; train_id < output.cols(); train_id++)
67-
{
68-
grad_input(output_words(train_id), train_id) += 1.;
69-
grad_input.col(train_id) -= output.col(train_id).array().exp().matrix();
70-
}
67+
grad_input(output_words(train_id), train_id) += 1.;
68+
grad_input.col(train_id) -= output.col(train_id).array().exp().matrix();
7169
}
70+
}
7271
};
7372

7473
///// Softmax layer plus NCE loss function.
@@ -81,55 +80,55 @@ struct SoftmaxLogLoss
8180
template <typename Multinomial>
8281
class SoftmaxNCELoss
8382
{
84-
const Multinomial &unigram;
83+
const Multinomial &unigram;
8584

86-
public:
87-
SoftmaxNCELoss(const Multinomial &unigram)
85+
public:
86+
SoftmaxNCELoss(const Multinomial &unigram)
8887
: unigram(unigram)
88+
{
89+
}
90+
91+
template <typename DerivedI, typename DerivedW, typename DerivedO>
92+
void fProp(const MatrixBase<DerivedI> &scores,
93+
const MatrixBase<DerivedW> &minibatch_samples,
94+
const MatrixBase<DerivedO> &output_const, double &loss)
95+
{
96+
UNCONST(DerivedO, output_const, output);
97+
double log_likelihood = 0.0;
98+
int num_noise_samples = minibatch_samples.rows()-1;
99+
double log_num_noise_samples = std::log(num_noise_samples);
100+
#pragma omp parallel for reduction(+:log_likelihood) schedule(static)
101+
for (int train_id = 0; train_id < scores.cols(); train_id++)
89102
{
103+
for (int sample_id = 0;sample_id < minibatch_samples.rows(); sample_id++)
104+
{
105+
int sample = minibatch_samples(sample_id, train_id);
106+
// To avoid zero or infinite probabilities,
107+
// never take exp of score without normalizing first,
108+
// even if it's a little slower...
109+
double score = scores(sample_id, train_id);
110+
double score_noise = log_num_noise_samples + unigram.logprob(sample);
111+
double z = logadd(score, score_noise);
112+
double logprob = score - z;
113+
double logprob_noise = score_noise - z;
114+
output(sample_id, train_id) = std::exp(logprob);
115+
log_likelihood += sample_id == 0 ? logprob : logprob_noise;
116+
}
90117
}
91-
92-
template <typename DerivedI, typename DerivedW, typename DerivedO>
93-
void fProp(const MatrixBase<DerivedI> &scores,
94-
const MatrixBase<DerivedW> &minibatch_samples,
95-
const MatrixBase<DerivedO> &output_const, double &loss)
96-
{
97-
UNCONST(DerivedO, output_const, output);
98-
double log_likelihood = 0.0;
99-
int num_noise_samples = minibatch_samples.rows()-1;
100-
double log_num_noise_samples = std::log(num_noise_samples);
101-
#pragma omp parallel for reduction(+:log_likelihood) schedule(static)
102-
for (int train_id = 0; train_id < scores.cols(); train_id++)
103-
{
104-
for (int sample_id = 0;sample_id < minibatch_samples.rows(); sample_id++)
105-
{
106-
int sample = minibatch_samples(sample_id, train_id);
107-
// To avoid zero or infinite probabilities,
108-
// never take exp of score without normalizing first,
109-
// even if it's a little slower...
110-
double score = scores(sample_id, train_id);
111-
double score_noise = log_num_noise_samples + unigram.logprob(sample);
112-
double z = logadd(score, score_noise);
113-
double logprob = score - z;
114-
double logprob_noise = score_noise - z;
115-
output(sample_id, train_id) = std::exp(logprob);
116-
log_likelihood += sample_id == 0 ? logprob : logprob_noise;
117-
}
118-
}
119-
loss = log_likelihood;
120-
}
121-
122-
template <typename DerivedO, typename DerivedI>
123-
void bProp(const MatrixBase<DerivedO> &probs, const MatrixBase<DerivedI> &output_const)
118+
loss = log_likelihood;
119+
}
120+
121+
template <typename DerivedO, typename DerivedI>
122+
void bProp(const MatrixBase<DerivedO> &probs, const MatrixBase<DerivedI> &output_const)
123+
{
124+
UNCONST(DerivedI, output_const, output);
125+
#pragma omp parallel for schedule(static)
126+
for (int train_id = 0; train_id < probs.cols(); train_id++)
124127
{
125-
UNCONST(DerivedI, output_const, output);
126-
#pragma omp parallel for schedule(static)
127-
for (int train_id = 0; train_id < probs.cols(); train_id++)
128-
{
129-
output.col(train_id) = -probs.col(train_id);
130-
output(0, train_id) += 1.0;
131-
}
128+
output.col(train_id) = -probs.col(train_id);
129+
output(0, train_id) += 1.0;
132130
}
131+
}
133132
};
134133

135134
} // namespace nplm

0 commit comments

Comments
 (0)