@@ -6,11 +6,11 @@ calculate.integrative.similarity.matrix <- function(
66 filter.to.common.patients = FALSE ,
77 patients.to.return = NULL ,
88 patients.for.correlations = NULL
9- ) {
9+ ) {
1010
1111 # pull out the patients to use
1212 patients <- NULL ;
13- for (data.type in data.types ) {
13+ for (data.type in data.types ) {
1414 if (filter.to.common.patients ) {
1515 # assume patient IDs have at least one number in them and annotation columns don't
1616 if (is.null(patients )) {
@@ -36,81 +36,86 @@ calculate.integrative.similarity.matrix <- function(
3636 }
3737 else {
3838 patients.to.return <- intersect(patients.to.return , patients );
39- }
39+ }
4040 if (is.null(patients.for.correlations )) {
4141 patients.for.correlations <- patients ;
4242 }
4343 else {
4444 patients.for.correlations <- intersect(patients.for.correlations , patients );
4545 }
46- patient.pairs <- as.character(sapply(1 : (length(patients.for.correlations )), function (x ) {paste0(patients.for.correlations [x ], ' :' , patients.to.return )}));
46+ patient.pairs <- as.character(sapply(1 : (length(patients.for.correlations )), function (x ) {
47+ paste0(patients.for.correlations [x ], ' :' , patients.to.return )
48+ }
49+ ));
4750 patients <- sort(unique(c(patients.to.return , patients.for.correlations )));
4851
4952 # calculate pair-wise distances
5053 patient.paired.dists <- list ();
5154 patient.paired.dists.matrix <- matrix (NA , ncol = length(data.types ), nrow = length(patient.pairs ));
5255 colnames(patient.paired.dists.matrix ) <- data.types ;
5356 rownames(patient.paired.dists.matrix ) <- patient.pairs ;
54- for (data.type in data.types ) {
57+ for (data.type in data.types ) {
5558 # filter to required patients
56- data.matrices [[data.type ]] <- data.matrices [[data.type ]][,sort(colnames(data.matrices [[data.type ]])[colnames(data.matrices [[data.type ]]) %in% patients ])];
59+ data.matrices [[data.type ]] <- data.matrices [[data.type ]][, sort(colnames(data.matrices [[data.type ]])[colnames(data.matrices [[data.type ]]) %in% patients ])];
5760
5861 # set up the matrix of distance pairs that are required
5962 dist.matrix <- matrix (NA , ncol = length(patients.for.correlations ), nrow = length(patients.to.return ));
6063 colnames(dist.matrix ) <- patients.for.correlations ;
6164 rownames(dist.matrix ) <- patients.to.return ;
6265
6366 # determine the most efficient approach for calculating the distances
64- # we need pr.num x pc.num distances but calculating distances creates
67+ # we need pr.num x pc.num distances but calculating distances creates
6568 # matrix with the same columns and rows which would be (pr.num + pc.num)^2
6669 # calculating distances in sets can mean less unnecessary calculations are done
6770 # comparing the each of the patients.to.return to the patients.for.correlation
6871 # select the number of patients.to.return to compare to patients.for.correlation at a time
6972 # (pc.num + k) is the patients per comparison
70- # then there will be (ceiling(pr.num/k)- 1) comparisons
73+ # then there will be (ceiling(pr.num/k) - 1) comparisons
7174 # and then an additional (pc.num + j) where j is the remainder not calculated
7275 dist.calc.operations <- list ();
7376 pr.num <- length(patients.to.return );
7477 pc.num <- length(patients.for.correlations );
7578 opt.num.of.return.to.calc.at.once <- order(sapply(
7679 1 : pr.num ,
77- function (k ) {(pc.num + k )^ 2 * (ceiling(pr.num / k )- 1 ) + (pc.num + ifelse((pr.num %% k ) == 0 , k , pr.num %% k ))^ 2 }
78- ))[1 ];
80+ function (k ) {
81+ (pc.num + k )^ 2 * (ceiling(pr.num / k ) - 1 ) + (pc.num + ifelse((pr.num %% k ) == 0 , k , pr.num %% k ))^ 2
82+ }
83+ ))[1 ];
7984 pr.tracker <- 0 ;
80- while (pr.tracker < pr.num ) {
85+ while (pr.tracker < pr.num ) {
8186 if ((pr.tracker + opt.num.of.return.to.calc.at.once ) < pr.num ) {
82- dist.calc.operations [[as.character(pr.tracker )]] <- patients.to.return [(pr.tracker + 1 ): (pr.tracker + opt.num.of.return.to.calc.at.once )];
87+ dist.calc.operations [[as.character(pr.tracker )]] <- patients.to.return [(pr.tracker + 1 ): (pr.tracker + opt.num.of.return.to.calc.at.once )];
8388 pr.tracker <- pr.tracker + opt.num.of.return.to.calc.at.once ;
8489 }
8590 else {
86- dist.calc.operations [[as.character(pr.tracker )]] <- patients.to.return [(pr.tracker + 1 ): pr.num ];
91+ dist.calc.operations [[as.character(pr.tracker )]] <- patients.to.return [(pr.tracker + 1 ): pr.num ];
8792 pr.tracker <- pr.num ;
8893 }
8994 }
9095
9196 # calculate distances and fill in patient by patient distance matrix
92- for (dist.op in 1 : length(dist.calc.operations )) {
97+ for (dist.op in 1 : length(dist.calc.operations )) {
9398 if (class(dist.metrics [[data.type ]]) == ' character' ) {
9499
95- if (dist.metrics [[data.type ]] %in% c(' pearson' ,' spearman' )) {
100+ if (dist.metrics [[data.type ]] %in% c(' pearson' , ' spearman' )) {
96101 # if the distance metric is a correlation, convert the correlation into a distance
97102 dist.result <- as.dist(
98- 1 - cor(data.matrices [[data.type ]][,intersect(colnames(data.matrices [[data.type ]]),unique(c(dist.calc.operations [dist.op ][[1 ]],patients.for.correlations )))],
103+ 1 - cor(data.matrices [[data.type ]][, intersect(colnames(data.matrices [[data.type ]]), unique(c(dist.calc.operations [dist.op ][[1 ]], patients.for.correlations )))],
99104 use = ' pairwise' ,
100105 method = dist.metrics [[data.type ]])
101106 );
102107 }
103108 else {
104109 # for distances other than correlations use the distance function
105110 dist.result <- distance(
106- t(data.matrices [[data.type ]][,intersect(colnames(data.matrices [[data.type ]]),unique(c(dist.calc.operations [dist.op ][[1 ]],patients.for.correlations )))]),
111+ t(data.matrices [[data.type ]][, intersect(colnames(data.matrices [[data.type ]]), unique(c(dist.calc.operations [dist.op ][[1 ]], patients.for.correlations )))]),
107112 method = dist.metrics [[data.type ]],
108113 use.row.names = TRUE
109114 );
110115 }
111116 }
112117 else if (class(dist.metrics [[data.type ]]) == ' function' ) {
113- dist.result <- as.dist((dist.metrics [[data.type ]])(t(data.matrices [[data.type ]][,intersect(colnames(dist.metrics [[data.type ]]), unique(c(dist.calc.operations [dist.op ][[1 ]], patients.for.correlations )))])));
118+ dist.result <- as.dist((dist.metrics [[data.type ]])(t(data.matrices [[data.type ]][, intersect(colnames(dist.metrics [[data.type ]]), unique(c(dist.calc.operations [dist.op ][[1 ]], patients.for.correlations )))])));
114119 }
115120 else {
116121 stop(paste0(' invalid option for ' , data.type , ' distance metric: ' , dist.metrics [[data.type ]]));
@@ -125,29 +130,39 @@ calculate.integrative.similarity.matrix <- function(
125130 }
126131
127132 patient.paired.dists [[data.type ]] <- dist.matrix ;
128- patient.pairs <- as.character(sapply(1 : (ncol(dist.matrix )),function (x ) {paste0(colnames(dist.matrix )[x ], ' :' , rownames(dist.matrix ))}));
133+ patient.pairs <- as.character(sapply(1 : (ncol(dist.matrix )),function (x ) {
134+ paste0(colnames(dist.matrix )[x ], ' :' , rownames(dist.matrix ))
135+ }
136+ ));
129137 patient.paired.dists.matrix [patient.pairs ,data.type ] <- as.numeric(dist.matrix );
130138 }
131139 # find the rows with at least one value (not na) beyond the patient by patient comparison
132- patient.paired.dists.matrix <- patient.paired.dists.matrix [apply(patient.paired.dists.matrix , 1 , function (x ) {sum(! is.na(x ))}) > 1 ,];
140+ patient.paired.dists.matrix <- patient.paired.dists.matrix [apply(patient.paired.dists.matrix , 1 , function (x ) {
141+ sum(! is.na(x ))
142+ }
143+ ) > 1 ,];
133144
134145 split.rownames <- strsplit(rownames(patient.paired.dists.matrix ), ' :' );
135- pair.patient1 <- sapply(1 : length(split.rownames ), function (i ) {split.rownames [[i ]][1 ]});
136- pair.patient2 <- sapply(1 : length(split.rownames ), function (i ) {split.rownames [[i ]][2 ]});
146+ pair.patient1 <- sapply(1 : length(split.rownames ), function (i ) {
147+ split.rownames [[i ]][1 ]
148+ });
149+ pair.patient2 <- sapply(1 : length(split.rownames ), function (i ) {
150+ split.rownames [[i ]][2 ]
151+ });
137152
138153 # calculate correlations (or integrative similarity) between data types
139- per.patient.data.type.corr <- matrix (NA , nrow = length(patients.to.return ), ncol = length(data.types )* (length(data.types )- 1 ) / 2 );
154+ per.patient.data.type.corr <- matrix (NA , nrow = length(patients.to.return ), ncol = length(data.types ) * (length(data.types ) - 1 ) / 2 );
140155 rownames(per.patient.data.type.corr ) <- patients.to.return ;
141156 colnames(per.patient.data.type.corr ) <- seq(1 , ncol(per.patient.data.type.corr ));
142157 data.type.pair.counter <- 0 ;
143- for (i in 1 : (length(data.types )- 1 )) {
144- for (j in (i + 1 ): length(data.types )) {
158+ for (i in 1 : (length(data.types ) - 1 )) {
159+ for (j in (i + 1 ): length(data.types )) {
145160
146161 data.type.pair.counter <- data.type.pair.counter + 1 ;
147162 colnames(per.patient.data.type.corr )[data.type.pair.counter ] <- paste0(data.types [i ], ' :' , data.types [j ]);
148163 not.na.rows <- (! is.na(patient.paired.dists.matrix [,i ])) & (! is.na(patient.paired.dists.matrix [,j ]));
149164
150- for (patient in rownames(per.patient.data.type.corr )) {
165+ for (patient in rownames(per.patient.data.type.corr )) {
151166 rows.to.use <- which(pair.patient2 == patient & not.na.rows );
152167 if (length(rows.to.use ) > 1 ) {
153168 per.patient.data.type.corr [patient ,data.type.pair.counter ] <- cor(
@@ -159,7 +174,10 @@ calculate.integrative.similarity.matrix <- function(
159174 }
160175 }
161176 }
162- per.patient.data.type.corr <- per.patient.data.type.corr [which(apply(per.patient.data.type.corr , 1 , function (x ) { sum(! is.na(x )) }) > 0 ), , drop = FALSE ];
177+ per.patient.data.type.corr <- per.patient.data.type.corr [which(apply(per.patient.data.type.corr , 1 , function (x ) {
178+ sum(! is.na(x ))
179+ }
180+ ) > 0 ), , drop = FALSE ];
163181
164182 return (per.patient.data.type.corr );
165183 }
0 commit comments