Skip to content

Commit ca9c48c

Browse files
committed
update CPP files
1 parent 1e70eb1 commit ca9c48c

35 files changed

+9233
-228
lines changed

.DS_Store

2 KB
Binary file not shown.

src/.DS_Store

6 KB
Binary file not shown.

src/.Rapp.history

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
load("/Users/mingzhiye/CppProjects/cit/src/symbols.rds")

src/Makevars

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
CXX_STD=CXX11
2-
PKG_LIBS = `gsl-config --libs`
3-
PKG_CFLAGS = `gsl-config --cflags`
1+
CXX_STD=CXX14
2+
PKG_LIBS = $(shell gsl-config --libs)
3+
PKG_CPPFLAGS = $(shell gsl-config --cflags)

src/Makevars.win

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
CXX_STD=CXX11
1+
CXX_STD=CXX14
22
PKG_CPPFLAGS=-I$(LIB_GSL)/include
33
PKG_LIBS=-L$(LIB_GSL)/lib -lgsl -lgslcblas

src/cit_init.c

-27
This file was deleted.

src/citbin_v1.cpp

+315
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
#include <R.h>
2+
#include <Rmath.h>
3+
#include <vector>
4+
#include <algorithm>
5+
#include <gsl/gsl_fit.h>
6+
#include <gsl/gsl_multifit.h>
7+
#include <gsl/gsl_statistics.h>
8+
#include <gsl/gsl_math.h>
9+
#include <gsl/gsl_cdf.h>
10+
#include <iostream>
11+
#include <random> // std::default_random_engine
12+
#include "logisticfunc.h"
13+
#include <Rcpp.h>
14+
#include "maxElementWithNan.h"
15+
16+
using namespace Rcpp;
17+
using namespace std;
18+
19+
20+
/*
21+
L: matrix of continuous instrumental variables
22+
G: matrix of candidate causal mediators
23+
T: matrix of 0/1 variables
24+
Programmer: Joshua Millstein
25+
*/
26+
27+
28+
29+
// [[Rcpp::export]]
30+
void citbin( Rcpp::NumericVector L, Rcpp::NumericVector G, Rcpp::NumericVector T, int &maxit, int &nrow,
31+
int &ncol, Rcpp::NumericVector pval1, Rcpp::NumericVector pval2, Rcpp::NumericVector pval3, Rcpp::NumericVector pval4 , Rcpp::NumericVector pval3nc, int &rseed)
32+
{
33+
unsigned seed = rseed;
34+
int rw, cl, i, rind, df, df1, df2, nobs, ip, npos, nperm, nmiss, stride;
35+
double rss2, rss3, rss5, F, pv, pvp, maxp, tmp, rhs, testval;
36+
bool aa, bb, cc, dd, converged;
37+
const int firstloop = 1000;
38+
const int posno = 20;
39+
const double alpha = .1;
40+
vector<vector<double> > LL;
41+
vector<double> pvec;
42+
vector<double> gpred;
43+
vector<double> gresid;
44+
45+
gsl_matrix *Lm, *cov, *X;
46+
gsl_vector *Gm, *Tm, *Gp, *c;
47+
48+
double *designmat = new double[ nrow * (ncol + 2) ];
49+
double *phenovec = new double[ nrow ];
50+
51+
LL.resize( nrow );
52+
GetRNGstate();
53+
54+
for(rw = 0; rw < nrow; rw++) {
55+
LL[rw].resize( ncol );
56+
}
57+
58+
for(cl = 0; cl < ncol; cl++) {
59+
for(rw = 0; rw < nrow; rw++) {
60+
LL[rw][cl] = L[rw + nrow * cl];
61+
}
62+
}
63+
64+
65+
// create analysis vectors w/no missing data
66+
nobs = 0;
67+
for(rw = 0; rw < nrow; rw++) {
68+
nmiss = 0;
69+
for(cl = 0; cl < ncol; cl++) {
70+
if( LL[rw][cl] == -9999 ) {
71+
nmiss++;
72+
}
73+
}
74+
aa = nmiss == 0;
75+
bb = G[rw] != -9999;
76+
cc = T[rw] != -9999;
77+
if(aa && bb && cc) {
78+
nobs++;
79+
}
80+
}
81+
82+
Lm = gsl_matrix_alloc (nobs, ncol);
83+
Gm = gsl_vector_alloc (nobs);
84+
Tm = gsl_vector_alloc (nobs);
85+
rind = 0;
86+
for(rw = 0; rw < nrow; rw++) {
87+
nmiss = 0;
88+
for(cl = 0; cl < ncol; cl++) {
89+
if( LL[rw][cl] == -9999 ) {
90+
nmiss++;
91+
}
92+
}
93+
aa = nmiss == 0;
94+
bb = G[rw] != -9999;
95+
cc = T[rw] != -9999;
96+
97+
if(aa && bb && cc) {
98+
for(cl = 0; cl < ncol; cl++) {
99+
gsl_matrix_set(Lm, rind, cl, LL[rw][cl]);
100+
}
101+
gsl_vector_set(Gm, rind, G[rw]);
102+
gsl_vector_set(Tm, rind, T[rw]);
103+
rind++;
104+
}
105+
}
106+
// fit model T ~ L
107+
ip = 1 + ncol; // intercept + multiple L variable
108+
for(rw = 0; rw < nobs; rw++) {
109+
phenovec[ rw ] = gsl_vector_get(Tm, rw );
110+
designmat[ rw * ip ] = 1; // intercept
111+
for(cl = 0; cl < ncol; cl++) {
112+
designmat[ rw * ip + 1 + cl ] = gsl_matrix_get (Lm, rw, cl);
113+
}
114+
}
115+
df = ncol;
116+
converged = logisticReg( pv, phenovec, designmat, nobs, ip, df );
117+
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
118+
pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN();
119+
pvec.push_back( pv ); // pval for T ~ L, 9 if it did not converge, p1
120+
121+
// fit model T ~ L + G
122+
stride = ip + 1;
123+
for(rw = 0; rw < nobs; rw++) {
124+
designmat[ rw * stride ] = 1; // intercept
125+
for(cl = 0; cl < ncol; cl++) {
126+
designmat[ rw * stride + 1 + cl ] = gsl_matrix_get (Lm, rw, cl);
127+
}
128+
designmat[ rw * stride + 1 + ncol ] = gsl_vector_get(Gm, rw );
129+
}
130+
131+
df = 1;
132+
converged = logisticReg( pv, phenovec, designmat, nobs, stride, df );
133+
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
134+
pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN();
135+
pvec.push_back( pv ); // pval for T ~ G|L, 9 if it did not converge, p2
136+
137+
// fit model G ~ T
138+
X = gsl_matrix_alloc (nobs,2);
139+
for(rw = 0; rw < nobs; rw++) {
140+
gsl_matrix_set(X, rw, 0, 1.0); // intercept
141+
gsl_matrix_set(X, rw, 1, gsl_vector_get (Tm, rw));
142+
}
143+
c = gsl_vector_alloc (2);
144+
cov = gsl_matrix_alloc (2, 2);
145+
gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (nobs, 2);
146+
gsl_multifit_linear (X, Gm, c, cov, &rss2, work);
147+
gsl_multifit_linear_free (work);
148+
gsl_matrix_free (X);
149+
gsl_matrix_free (cov);
150+
gsl_vector_free (c);
151+
152+
// fit model G ~ L + T
153+
X = gsl_matrix_alloc (nobs, ip + 1);
154+
for(rw = 0; rw < nobs; rw++) {
155+
gsl_matrix_set(X, rw, 0, 1.0); // intercept
156+
for(cl = 0; cl < ncol; cl++) {
157+
gsl_matrix_set(X, rw, cl + 1, gsl_matrix_get (Lm, rw, cl));
158+
}
159+
gsl_matrix_set(X, rw, ip, gsl_vector_get (Tm, rw));
160+
}
161+
c = gsl_vector_alloc (ip + 1);
162+
cov = gsl_matrix_alloc (ip + 1, ip + 1);
163+
work = gsl_multifit_linear_alloc (nobs, ip + 1);
164+
gsl_multifit_linear (X, Gm, c, cov, &rss3, work);
165+
gsl_multifit_linear_free (work);
166+
gsl_matrix_free (X);
167+
gsl_matrix_free (cov);
168+
gsl_vector_free (c);
169+
df1 = ncol;
170+
df2 = nobs - ip -1;
171+
F = df2*(rss2-rss3)/(rss3*df1);
172+
pv = gsl_cdf_fdist_Q(F, df1, df2);
173+
pvec.push_back( pv ); // pval for G ~ L|T, p3
174+
175+
// fit model T ~ G + L to test L
176+
stride = ip + 1;
177+
for(rw = 0; rw < nobs; rw++) {
178+
designmat[ rw * stride ] = 1; // intercept
179+
designmat[ rw * stride + 1 ] = gsl_vector_get(Gm, rw );
180+
for(cl = 0; cl < ncol; cl++) {
181+
designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl);
182+
}
183+
}
184+
df = ncol;
185+
converged = logisticReg( pv, phenovec, designmat, nobs, stride, df );
186+
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
187+
pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G
188+
pval3nc[0] = pv; // pvalue to be used for non-centrality parameter
189+
190+
// fit model G ~ L
191+
X = gsl_matrix_alloc (nobs, ip );
192+
for(rw = 0; rw < nobs; rw++) {
193+
gsl_matrix_set(X, rw, 0, 1.0); // intercept
194+
for(cl = 0; cl < ncol; cl++) {
195+
gsl_matrix_set(X, rw, cl + 1, gsl_matrix_get (Lm, rw, cl));
196+
}
197+
}
198+
c = gsl_vector_alloc (ip);
199+
cov = gsl_matrix_alloc (ip, ip);
200+
work = gsl_multifit_linear_alloc (nobs, ip);
201+
gsl_multifit_linear (X, Gm, c, cov, &rss5, work);
202+
gsl_multifit_linear_free (work);
203+
gsl_matrix_free (cov);
204+
205+
// residuals for G ~ L
206+
for(rw = 0; rw < nobs; rw++) {
207+
rhs = 0;
208+
for(cl = 0; cl < ip; cl++) {
209+
rhs += gsl_vector_get (c, cl) * gsl_matrix_get (X, rw, cl);
210+
}
211+
212+
gpred.push_back(rhs);
213+
tmp = gsl_vector_get (Gm, rw) - rhs;
214+
gresid.push_back(tmp);
215+
}
216+
gsl_vector_free (c);
217+
218+
// Conduct an initial set of permutations
219+
220+
Gp = gsl_vector_alloc (nobs);
221+
npos = 0;
222+
for(i = 0; i < firstloop; i++){
223+
// randomly permute residuals
224+
225+
shuffle( gresid.begin(), gresid.end(), std::default_random_engine(seed) );
226+
227+
// compute G* based on marginal L effects and permuted residuals
228+
for(rw = 0; rw < nobs; rw++) {
229+
gsl_vector_set(Gp, rw, gpred[rw] + gresid[rw] );
230+
}
231+
232+
// Recompute p-value for T ~ L|G based on G*
233+
// fit model T ~ G* + L to test L
234+
stride = ip + 1;
235+
for(rw = 0; rw < nobs; rw++) {
236+
designmat[ rw * stride ] = 1; // intercept
237+
designmat[ rw * stride + 1 ] = gsl_vector_get(Gp, rw );
238+
for(cl = 0; cl < ncol; cl++) {
239+
designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl);
240+
}
241+
}
242+
243+
df = ncol;
244+
converged = logisticReg( pvp, phenovec, designmat, nobs, stride, df );
245+
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
246+
pvp = ( converged ) ? pvp : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G*
247+
if( pvp > pv ) npos++;
248+
249+
} // end initial permutation loop
250+
251+
// Conduct additional permutations if there is some indication of statistical significance
252+
maxp = maxElementWithNan(pvec);
253+
nperm = firstloop;
254+
aa = npos < posno;
255+
bb = maxp < alpha;
256+
cc = nperm < maxit;
257+
maxp = maxElementWithNan(pvec);
258+
testval = (double) (npos + 1) / nperm ;
259+
dd = maxp < testval; // check that other component p-values are small
260+
261+
if(aa && bb && cc && dd){
262+
while(aa && cc) {
263+
264+
// randomly permute residuals
265+
266+
shuffle( gresid.begin(), gresid.end(), std::default_random_engine(seed) );
267+
// compute G* based on marginal L effects and permuted residuals
268+
for(rw = 0; rw < nobs; rw++) {
269+
gsl_vector_set(Gp, rw, gpred[rw] + gresid[rw] );
270+
}
271+
272+
// Recompute p-value for T ~ L|G based on G*
273+
// fit model T ~ G* + L to test L
274+
stride = ip + 1;
275+
for(rw = 0; rw < nobs; rw++) {
276+
designmat[ rw * stride ] = 1; // intercept
277+
designmat[ rw * stride + 1 ] = gsl_vector_get(Gp, rw );
278+
for(cl = 0; cl < ncol; cl++) {
279+
designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl);
280+
}
281+
}
282+
283+
df = ncol;
284+
converged = logisticReg( pvp, phenovec, designmat, nobs, stride, df );
285+
if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl;
286+
pvp = ( converged ) ? pvp : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G*
287+
if( pvp > pv ) npos++;
288+
289+
aa = npos < posno;
290+
cc = nperm < ( maxit - 1 );
291+
nperm++;
292+
} // end 'while' permutation loop
293+
} // End if
294+
pv = 1.0 * npos / nperm;
295+
pvec.push_back(pv); // pval for L ind T|G
296+
297+
pval1[0] = pvec[0]; // pval for T ~ L
298+
pval2[0] = pvec[1]; // pval for T ~ G|L
299+
pval3[0] = pvec[2]; // pval for G ~ L|T
300+
pval4[0] = pvec[3]; // pval for L ind T|G
301+
302+
pvec.clear();
303+
gresid.clear();
304+
gpred.clear();
305+
gsl_matrix_free (Lm);
306+
gsl_vector_free (Gm);
307+
gsl_vector_free (Tm);
308+
gsl_vector_free (Gp);
309+
310+
delete [] designmat;
311+
delete [] phenovec;
312+
313+
LL.clear();
314+
315+
} // End citbin

0 commit comments

Comments
 (0)