|
| 1 | +#include <R.h> |
| 2 | +#include <Rmath.h> |
| 3 | +#include <vector> |
| 4 | +#include <algorithm> |
| 5 | +#include <gsl/gsl_fit.h> |
| 6 | +#include <gsl/gsl_multifit.h> |
| 7 | +#include <gsl/gsl_statistics.h> |
| 8 | +#include <gsl/gsl_math.h> |
| 9 | +#include <gsl/gsl_cdf.h> |
| 10 | +#include <iostream> |
| 11 | +#include <random> // std::default_random_engine |
| 12 | +#include "logisticfunc.h" |
| 13 | +#include <Rcpp.h> |
| 14 | +#include "maxElementWithNan.h" |
| 15 | + |
| 16 | +using namespace Rcpp; |
| 17 | +using namespace std; |
| 18 | + |
| 19 | + |
| 20 | +/* |
| 21 | +L: matrix of continuous instrumental variables |
| 22 | +G: matrix of candidate causal mediators |
| 23 | +T: matrix of 0/1 variables |
| 24 | +Programmer: Joshua Millstein |
| 25 | +*/ |
| 26 | + |
| 27 | + |
| 28 | + |
| 29 | +// [[Rcpp::export]] |
| 30 | +void citbin( Rcpp::NumericVector L, Rcpp::NumericVector G, Rcpp::NumericVector T, int &maxit, int &nrow, |
| 31 | + int &ncol, Rcpp::NumericVector pval1, Rcpp::NumericVector pval2, Rcpp::NumericVector pval3, Rcpp::NumericVector pval4 , Rcpp::NumericVector pval3nc, int &rseed) |
| 32 | +{ |
| 33 | + unsigned seed = rseed; |
| 34 | + int rw, cl, i, rind, df, df1, df2, nobs, ip, npos, nperm, nmiss, stride; |
| 35 | + double rss2, rss3, rss5, F, pv, pvp, maxp, tmp, rhs, testval; |
| 36 | + bool aa, bb, cc, dd, converged; |
| 37 | + const int firstloop = 1000; |
| 38 | + const int posno = 20; |
| 39 | + const double alpha = .1; |
| 40 | + vector<vector<double> > LL; |
| 41 | + vector<double> pvec; |
| 42 | + vector<double> gpred; |
| 43 | + vector<double> gresid; |
| 44 | + |
| 45 | + gsl_matrix *Lm, *cov, *X; |
| 46 | + gsl_vector *Gm, *Tm, *Gp, *c; |
| 47 | + |
| 48 | + double *designmat = new double[ nrow * (ncol + 2) ]; |
| 49 | + double *phenovec = new double[ nrow ]; |
| 50 | + |
| 51 | + LL.resize( nrow ); |
| 52 | + GetRNGstate(); |
| 53 | + |
| 54 | + for(rw = 0; rw < nrow; rw++) { |
| 55 | + LL[rw].resize( ncol ); |
| 56 | + } |
| 57 | + |
| 58 | + for(cl = 0; cl < ncol; cl++) { |
| 59 | + for(rw = 0; rw < nrow; rw++) { |
| 60 | + LL[rw][cl] = L[rw + nrow * cl]; |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + |
| 65 | +// create analysis vectors w/no missing data |
| 66 | + nobs = 0; |
| 67 | + for(rw = 0; rw < nrow; rw++) { |
| 68 | + nmiss = 0; |
| 69 | + for(cl = 0; cl < ncol; cl++) { |
| 70 | + if( LL[rw][cl] == -9999 ) { |
| 71 | + nmiss++; |
| 72 | + } |
| 73 | + } |
| 74 | + aa = nmiss == 0; |
| 75 | + bb = G[rw] != -9999; |
| 76 | + cc = T[rw] != -9999; |
| 77 | + if(aa && bb && cc) { |
| 78 | + nobs++; |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + Lm = gsl_matrix_alloc (nobs, ncol); |
| 83 | + Gm = gsl_vector_alloc (nobs); |
| 84 | + Tm = gsl_vector_alloc (nobs); |
| 85 | + rind = 0; |
| 86 | + for(rw = 0; rw < nrow; rw++) { |
| 87 | + nmiss = 0; |
| 88 | + for(cl = 0; cl < ncol; cl++) { |
| 89 | + if( LL[rw][cl] == -9999 ) { |
| 90 | + nmiss++; |
| 91 | + } |
| 92 | + } |
| 93 | + aa = nmiss == 0; |
| 94 | + bb = G[rw] != -9999; |
| 95 | + cc = T[rw] != -9999; |
| 96 | + |
| 97 | + if(aa && bb && cc) { |
| 98 | + for(cl = 0; cl < ncol; cl++) { |
| 99 | + gsl_matrix_set(Lm, rind, cl, LL[rw][cl]); |
| 100 | + } |
| 101 | + gsl_vector_set(Gm, rind, G[rw]); |
| 102 | + gsl_vector_set(Tm, rind, T[rw]); |
| 103 | + rind++; |
| 104 | + } |
| 105 | + } |
| 106 | + // fit model T ~ L |
| 107 | + ip = 1 + ncol; // intercept + multiple L variable |
| 108 | + for(rw = 0; rw < nobs; rw++) { |
| 109 | + phenovec[ rw ] = gsl_vector_get(Tm, rw ); |
| 110 | + designmat[ rw * ip ] = 1; // intercept |
| 111 | + for(cl = 0; cl < ncol; cl++) { |
| 112 | + designmat[ rw * ip + 1 + cl ] = gsl_matrix_get (Lm, rw, cl); |
| 113 | + } |
| 114 | + } |
| 115 | + df = ncol; |
| 116 | + converged = logisticReg( pv, phenovec, designmat, nobs, ip, df ); |
| 117 | + if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl; |
| 118 | + pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN(); |
| 119 | + pvec.push_back( pv ); // pval for T ~ L, 9 if it did not converge, p1 |
| 120 | + |
| 121 | + // fit model T ~ L + G |
| 122 | + stride = ip + 1; |
| 123 | + for(rw = 0; rw < nobs; rw++) { |
| 124 | + designmat[ rw * stride ] = 1; // intercept |
| 125 | + for(cl = 0; cl < ncol; cl++) { |
| 126 | + designmat[ rw * stride + 1 + cl ] = gsl_matrix_get (Lm, rw, cl); |
| 127 | + } |
| 128 | + designmat[ rw * stride + 1 + ncol ] = gsl_vector_get(Gm, rw ); |
| 129 | + } |
| 130 | + |
| 131 | + df = 1; |
| 132 | + converged = logisticReg( pv, phenovec, designmat, nobs, stride, df ); |
| 133 | + if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl; |
| 134 | + pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN(); |
| 135 | + pvec.push_back( pv ); // pval for T ~ G|L, 9 if it did not converge, p2 |
| 136 | + |
| 137 | + // fit model G ~ T |
| 138 | + X = gsl_matrix_alloc (nobs,2); |
| 139 | + for(rw = 0; rw < nobs; rw++) { |
| 140 | + gsl_matrix_set(X, rw, 0, 1.0); // intercept |
| 141 | + gsl_matrix_set(X, rw, 1, gsl_vector_get (Tm, rw)); |
| 142 | + } |
| 143 | + c = gsl_vector_alloc (2); |
| 144 | + cov = gsl_matrix_alloc (2, 2); |
| 145 | + gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (nobs, 2); |
| 146 | + gsl_multifit_linear (X, Gm, c, cov, &rss2, work); |
| 147 | + gsl_multifit_linear_free (work); |
| 148 | + gsl_matrix_free (X); |
| 149 | + gsl_matrix_free (cov); |
| 150 | + gsl_vector_free (c); |
| 151 | + |
| 152 | + // fit model G ~ L + T |
| 153 | + X = gsl_matrix_alloc (nobs, ip + 1); |
| 154 | + for(rw = 0; rw < nobs; rw++) { |
| 155 | + gsl_matrix_set(X, rw, 0, 1.0); // intercept |
| 156 | + for(cl = 0; cl < ncol; cl++) { |
| 157 | + gsl_matrix_set(X, rw, cl + 1, gsl_matrix_get (Lm, rw, cl)); |
| 158 | + } |
| 159 | + gsl_matrix_set(X, rw, ip, gsl_vector_get (Tm, rw)); |
| 160 | + } |
| 161 | + c = gsl_vector_alloc (ip + 1); |
| 162 | + cov = gsl_matrix_alloc (ip + 1, ip + 1); |
| 163 | + work = gsl_multifit_linear_alloc (nobs, ip + 1); |
| 164 | + gsl_multifit_linear (X, Gm, c, cov, &rss3, work); |
| 165 | + gsl_multifit_linear_free (work); |
| 166 | + gsl_matrix_free (X); |
| 167 | + gsl_matrix_free (cov); |
| 168 | + gsl_vector_free (c); |
| 169 | + df1 = ncol; |
| 170 | + df2 = nobs - ip -1; |
| 171 | + F = df2*(rss2-rss3)/(rss3*df1); |
| 172 | + pv = gsl_cdf_fdist_Q(F, df1, df2); |
| 173 | + pvec.push_back( pv ); // pval for G ~ L|T, p3 |
| 174 | + |
| 175 | + // fit model T ~ G + L to test L |
| 176 | + stride = ip + 1; |
| 177 | + for(rw = 0; rw < nobs; rw++) { |
| 178 | + designmat[ rw * stride ] = 1; // intercept |
| 179 | + designmat[ rw * stride + 1 ] = gsl_vector_get(Gm, rw ); |
| 180 | + for(cl = 0; cl < ncol; cl++) { |
| 181 | + designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl); |
| 182 | + } |
| 183 | + } |
| 184 | + df = ncol; |
| 185 | + converged = logisticReg( pv, phenovec, designmat, nobs, stride, df ); |
| 186 | + if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl; |
| 187 | + pv = ( converged ) ? pv : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G |
| 188 | + pval3nc[0] = pv; // pvalue to be used for non-centrality parameter |
| 189 | + |
| 190 | + // fit model G ~ L |
| 191 | + X = gsl_matrix_alloc (nobs, ip ); |
| 192 | + for(rw = 0; rw < nobs; rw++) { |
| 193 | + gsl_matrix_set(X, rw, 0, 1.0); // intercept |
| 194 | + for(cl = 0; cl < ncol; cl++) { |
| 195 | + gsl_matrix_set(X, rw, cl + 1, gsl_matrix_get (Lm, rw, cl)); |
| 196 | + } |
| 197 | + } |
| 198 | + c = gsl_vector_alloc (ip); |
| 199 | + cov = gsl_matrix_alloc (ip, ip); |
| 200 | + work = gsl_multifit_linear_alloc (nobs, ip); |
| 201 | + gsl_multifit_linear (X, Gm, c, cov, &rss5, work); |
| 202 | + gsl_multifit_linear_free (work); |
| 203 | + gsl_matrix_free (cov); |
| 204 | + |
| 205 | + // residuals for G ~ L |
| 206 | + for(rw = 0; rw < nobs; rw++) { |
| 207 | + rhs = 0; |
| 208 | + for(cl = 0; cl < ip; cl++) { |
| 209 | + rhs += gsl_vector_get (c, cl) * gsl_matrix_get (X, rw, cl); |
| 210 | + } |
| 211 | + |
| 212 | + gpred.push_back(rhs); |
| 213 | + tmp = gsl_vector_get (Gm, rw) - rhs; |
| 214 | + gresid.push_back(tmp); |
| 215 | + } |
| 216 | + gsl_vector_free (c); |
| 217 | + |
| 218 | + // Conduct an initial set of permutations |
| 219 | + |
| 220 | + Gp = gsl_vector_alloc (nobs); |
| 221 | + npos = 0; |
| 222 | + for(i = 0; i < firstloop; i++){ |
| 223 | + // randomly permute residuals |
| 224 | + |
| 225 | + shuffle( gresid.begin(), gresid.end(), std::default_random_engine(seed) ); |
| 226 | + |
| 227 | + // compute G* based on marginal L effects and permuted residuals |
| 228 | + for(rw = 0; rw < nobs; rw++) { |
| 229 | + gsl_vector_set(Gp, rw, gpred[rw] + gresid[rw] ); |
| 230 | + } |
| 231 | + |
| 232 | + // Recompute p-value for T ~ L|G based on G* |
| 233 | + // fit model T ~ G* + L to test L |
| 234 | + stride = ip + 1; |
| 235 | + for(rw = 0; rw < nobs; rw++) { |
| 236 | + designmat[ rw * stride ] = 1; // intercept |
| 237 | + designmat[ rw * stride + 1 ] = gsl_vector_get(Gp, rw ); |
| 238 | + for(cl = 0; cl < ncol; cl++) { |
| 239 | + designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl); |
| 240 | + } |
| 241 | + } |
| 242 | + |
| 243 | + df = ncol; |
| 244 | + converged = logisticReg( pvp, phenovec, designmat, nobs, stride, df ); |
| 245 | + if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl; |
| 246 | + pvp = ( converged ) ? pvp : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G* |
| 247 | + if( pvp > pv ) npos++; |
| 248 | + |
| 249 | + } // end initial permutation loop |
| 250 | + |
| 251 | + // Conduct additional permutations if there is some indication of statistical significance |
| 252 | + maxp = maxElementWithNan(pvec); |
| 253 | + nperm = firstloop; |
| 254 | + aa = npos < posno; |
| 255 | + bb = maxp < alpha; |
| 256 | + cc = nperm < maxit; |
| 257 | + maxp = maxElementWithNan(pvec); |
| 258 | + testval = (double) (npos + 1) / nperm ; |
| 259 | + dd = maxp < testval; // check that other component p-values are small |
| 260 | + |
| 261 | + if(aa && bb && cc && dd){ |
| 262 | + while(aa && cc) { |
| 263 | + |
| 264 | + // randomly permute residuals |
| 265 | + |
| 266 | + shuffle( gresid.begin(), gresid.end(), std::default_random_engine(seed) ); |
| 267 | + // compute G* based on marginal L effects and permuted residuals |
| 268 | + for(rw = 0; rw < nobs; rw++) { |
| 269 | + gsl_vector_set(Gp, rw, gpred[rw] + gresid[rw] ); |
| 270 | + } |
| 271 | + |
| 272 | + // Recompute p-value for T ~ L|G based on G* |
| 273 | + // fit model T ~ G* + L to test L |
| 274 | + stride = ip + 1; |
| 275 | + for(rw = 0; rw < nobs; rw++) { |
| 276 | + designmat[ rw * stride ] = 1; // intercept |
| 277 | + designmat[ rw * stride + 1 ] = gsl_vector_get(Gp, rw ); |
| 278 | + for(cl = 0; cl < ncol; cl++) { |
| 279 | + designmat[ rw * stride + 2 + cl ] = gsl_matrix_get (Lm, rw, cl); |
| 280 | + } |
| 281 | + } |
| 282 | + |
| 283 | + df = ncol; |
| 284 | + converged = logisticReg( pvp, phenovec, designmat, nobs, stride, df ); |
| 285 | + if(!converged)Rcpp::Rcout<< "Warning: Cannot Converge when doing regression for calculating P-value." << std::endl; |
| 286 | + pvp = ( converged ) ? pvp : std::numeric_limits<double>::quiet_NaN(); // p-value for T ~ L|G* |
| 287 | + if( pvp > pv ) npos++; |
| 288 | + |
| 289 | + aa = npos < posno; |
| 290 | + cc = nperm < ( maxit - 1 ); |
| 291 | + nperm++; |
| 292 | + } // end 'while' permutation loop |
| 293 | + } // End if |
| 294 | + pv = 1.0 * npos / nperm; |
| 295 | + pvec.push_back(pv); // pval for L ind T|G |
| 296 | + |
| 297 | + pval1[0] = pvec[0]; // pval for T ~ L |
| 298 | + pval2[0] = pvec[1]; // pval for T ~ G|L |
| 299 | + pval3[0] = pvec[2]; // pval for G ~ L|T |
| 300 | + pval4[0] = pvec[3]; // pval for L ind T|G |
| 301 | + |
| 302 | + pvec.clear(); |
| 303 | + gresid.clear(); |
| 304 | + gpred.clear(); |
| 305 | + gsl_matrix_free (Lm); |
| 306 | + gsl_vector_free (Gm); |
| 307 | + gsl_vector_free (Tm); |
| 308 | + gsl_vector_free (Gp); |
| 309 | + |
| 310 | + delete [] designmat; |
| 311 | + delete [] phenovec; |
| 312 | + |
| 313 | + LL.clear(); |
| 314 | + |
| 315 | +} // End citbin |
0 commit comments