Skip to content

Commit 7f9bf54

Browse files
committed
Upd fimp and index
1 parent ce546d8 commit 7f9bf54

File tree

7 files changed

+407
-166
lines changed

7 files changed

+407
-166
lines changed

index.html

+45-27
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,18 @@
1010
<link rel="icon" type="image/png" href="https://statsim.com/app/images/favicon-16x16.png" sizes="16x16">
1111
<style>
1212
a { color: #3030B7 }
13+
.logo { width: 75px; padding: 0; margin: 8px 0 0 0}
1314
.btn, .port-btn { background: #3030B7 }
1415
.btn:hover, .port-btn:hover { background: #21218B }
1516
.file-field .btn { background: #BBB }
1617
.file-field .btn:hover { background: #AAA }
17-
.status-bar { background: #f5f5f5 }
18+
.grey-bar { background: #f5f5f5 }
1819
.spinner-green, .spinner-green-only { border-color: #3030B7 }
1920
</style>
2021
</head>
2122
<body>
2223

23-
<div class="status-bar">
24+
<div class="grey-bar">
2425
<div class="container">
2526
<div class="row">
2627
<div class="col s11" style="font-size: 14px;">
@@ -36,28 +37,31 @@
3637
<div class="container">
3738
<div class="row">
3839
<div class="col m12">
39-
<h4>Visualize high-dimensional data</h4>
40-
<p>Feature extraction is the process of reducing the number of variables (columns) in a dataset by obtaining a smaller representative set of variables using various methods of dimensionality reduction (<a href="https://en.wikipedia.org/wiki/Dimensionality_reduction">Wiki</a>). Try dimensionality reduction methods in the browser using Vis, a 100% free open-source tool for feature extraction and visualization.</p>
41-
<p>Currently supported methods: <i>PCA, t-SNE, UMAP, SOM, Autoencoder</i></p>
40+
<img src="vis.png" class="logo" alt="StatSim.Vis">
4241
</div>
4342
</div>
4443
<div class="row">
4544
<div id="port-container"></div>
4645
</div>
47-
<div class="row">
48-
<div class="col m12">
49-
<small>
50-
All processing and visualization happens in your browser. We don't see, collect or sell data you explore <br>
51-
Based on <a href="https://github.com/statsim/port">port</a> and some awesome npm packages:
52-
<a href="https://www.npmjs.com/package/ml-js">ml-js</a>,
53-
<a href="https://www.npmjs.com/package/tsne">tsne</a>,
54-
<a href="https://www.npmjs.com/package/umap-js">umap-js</a>,
55-
<a href="https://www.npmjs.com/package/plotly.js">plotly.js</a> and <a href="https://github.com/statsim/vis/blob/master/package.json">other</a>
56-
</small>
57-
<p>
58-
<a class="github-button" href="https://github.com/statsim/vis" data-icon="octicon-star" data-show-count="true" aria-label="Star statsim/vis on GitHub">Star</a>
59-
<a class="github-button" href="https://github.com/statsim/vis/issues" data-icon="octicon-issue-opened" data-show-count="true" aria-label="Issue statsim/vis on GitHub">Issue</a>
60-
</p>
46+
</div>
47+
<div class="grey-bar">
48+
<div class="container">
49+
<div class="row">
50+
<div class="col m12">
51+
<h4>Visualize high-dimensional data online</h4>
52+
<p>
53+
Feature extraction is the process of reducing the number of variables (columns) in a dataset by obtaining a smaller representative set of variables using various methods of dimensionality reduction (<a href="https://en.wikipedia.org/wiki/Dimensionality_reduction">Wiki</a>). Try dimensionality reduction methods in the browser using Vis, a 100% free open-source tool for feature extraction and visualization.
54+
Supported methods: <i>PCA, t-SNE, UMAP, SOM, Autoencoder</i>
55+
</p>
56+
57+
<small>
58+
All processing and visualization happens in your browser. We don't see, collect or sell data you explore <br>
59+
</small>
60+
<p>
61+
<a class="github-button" href="https://github.com/statsim/vis" data-icon="octicon-star" data-show-count="true" aria-label="Star statsim/vis on GitHub">Star</a>
62+
<a class="github-button" href="https://github.com/statsim/vis/issues" data-icon="octicon-issue-opened" data-show-count="true" aria-label="Issue statsim/vis on GitHub">Issue</a>
63+
</p>
64+
</div>
6165
</div>
6266
</div>
6367
</div>
@@ -87,18 +91,32 @@ <h4>Visualize high-dimensional data</h4>
8791
},
8892
"inputs": [
8993
{ "type": "file", "name": "File", "reactive": true },
90-
{ "type": "select", "name": "Dimensions", "options": [2, 3], "default": 2},
91-
{ "type": "select", "name": "Target variable" },
92-
{ "type": "select", "name": "Transform", "options": ['None', 'Scale', 'Log'], "default": 'None' },
93-
{ "type": "select", "name": "Method", "options": ['PCA', 'SOM', 't-SNE', 'UMAP', 'Autoencoder'], "default": "PCA", "onchange": (value) => {
94-
if (value === 'PCA') {
95-
return {'Steps': {'className': 'hidden'}}
94+
{ "type": "select", "name": "Target variable", "options": ['None'], "default": 'None' },
95+
{ "type": "select", "name": "Projection method", "options": ['None', 'PCA', 'SOM', 't-SNE', 'UMAP', 'Autoencoder'], "default": "None", "onchange": (value) => {
96+
if (value === 'None') {
97+
return {
98+
'Steps': {'className': 'hidden'},
99+
'Dimensions': {'className': 'hidden'},
100+
'Transform': {'className': 'hidden'}
101+
}
102+
} else if (value === 'PCA') {
103+
return {
104+
'Steps': {'className': 'hidden'},
105+
'Dimensions': {'className': ''},
106+
'Transform': {'className': ''}
107+
}
96108
} else {
97-
return {'Steps': {'className': ''}}
109+
return {
110+
'Steps': {'className': ''},
111+
'Dimensions': {'className': ''},
112+
'Transform': {'className': ''}
113+
}
98114
}
99115
}},
100-
{ "type": "select", "name": "Feature importance", "options": ['None', 'Random Forest'], "default": 'None' },
116+
{ "type": "select", "name": "Dimensions", "options": [2, 3], "default": 2},
117+
{ "type": "select", "name": "Transform", "options": ['None', 'Scale', 'Log'], "default": 'None' },
101118
{ "type": "int", "name": "Steps", "default": 200},
119+
{ "type": "select", "name": "Feature importance", "options": ['None', 'Random Forest'], "default": 'None' },
102120
]
103121
}
104122
})

package-lock.json

+46-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "vis",
3-
"version": "0.2.0",
3+
"version": "0.3.0",
44
"description": "Vis. Visualize high-dimensional data",
55
"main": "main.js",
66
"scripts": {
@@ -34,6 +34,7 @@
3434
"plotly.js": "^1.53.0",
3535
"random-forest": "0.0.7",
3636
"tsne": "^1.0.1",
37-
"umap-js": "^1.3.2"
37+
"umap-js": "^1.3.2",
38+
"vis-network": "^8.5.4"
3839
}
3940
}

src/process.js

+90-70
Original file line numberDiff line numberDiff line change
@@ -105,78 +105,82 @@ module.exports = class Process {
105105
console.log('[Vis] Embedding method:', params.method)
106106

107107
let Y
108-
if (params.method === 'PCA') {
109-
console.log('[Vis] Fitting PCA')
110-
const pca = new PCA(X)
111-
Y = pca.predict(X, { 'nComponents': nDims}).to2DArray()
112-
} else if (params.method === 'SOM') {
113-
const som = new SOM(100, 100, { 'iterations': Math.round(params.steps / 10), 'fields': X[0].length})
114-
som.train(X)
115-
Y = som.predict(X)
116-
if (nDims === 3) {
117-
Y = Y.map(y => y.concat([0]))
118-
}
119-
} else if (params.method === 'UMAP') {
120-
console.log('[Vis] Fitting UMAP')
121-
const umap = new UMAP({ 'nComponents': nDims, 'nEpochs': params.steps })
122-
umap.initializeFit(X)
123-
for (let i = 0; i < params.steps; i++) {
124-
umap.step()
125-
}
126-
Y = umap.getEmbedding()
127-
} else if (params.method === 'Autoencoder') {
128-
console.log('[Vis] Fitting Autoencoder')
129-
// const ae = new Autoencoder({'nInputs': cols.length, 'nHidden': nDims, 'nLayers': 3, 'activation': 'tanh'})
130-
const ae = new Autoencoder({
131-
'encoder': [
132-
{'nOut': 20, 'activation': 'tanh'},
133-
{'nOut': nDims, 'activation': 'sigmoid'}
134-
],
135-
'decoder': [
136-
{'nOut': 20, 'activation': 'tanh'},
137-
{'nOut': cols.length}
138-
]
139-
})
140-
ae.fit(X, {
141-
'iterations': params.steps * 50,
142-
'stepSize': 0.005,
143-
'batchSize': 20,
144-
'method': 'adam'
145-
})
146-
Y = ae.encode(X)
147-
148-
impMatrix = []
149-
150-
console.log('[Vis] Generate importance matrix with Autoencoder')
151-
featuresFiltered.forEach((f, fi) => {
152-
const impTemp = []
153-
const Xr = []
154-
X.forEach(x => Xr.push(x.slice(0)))
155-
for (let i = Xr.length - 1; i > 0; i--) {
156-
const j = Math.floor(Math.random() * (i + 1))
157-
const x = Xr[i][fi]
158-
Xr[i][fi] = Xr[j][fi]
159-
Xr[j][fi] = x
108+
109+
// Projection
110+
if (params.method !== 'None') {
111+
if (params.method === 'PCA') {
112+
console.log('[Vis] Fitting PCA')
113+
const pca = new PCA(X)
114+
Y = pca.predict(X, { 'nComponents': nDims}).to2DArray()
115+
} else if (params.method === 'SOM') {
116+
const som = new SOM(100, 100, { 'iterations': Math.round(params.steps / 10), 'fields': X[0].length})
117+
som.train(X)
118+
Y = som.predict(X)
119+
if (nDims === 3) {
120+
Y = Y.map(y => y.concat([0]))
160121
}
161-
const Xp = ae.predict(Xr)
162-
featuresFiltered.forEach((ff, ffi) => {
163-
const mse = Xp.reduce((a, x, xi) => Math.pow(x[ffi] - X[xi][ffi], 2) + a, 0) / Xp.length
164-
impTemp.push(mse)
122+
} else if (params.method === 'UMAP') {
123+
console.log('[Vis] Fitting UMAP')
124+
const umap = new UMAP({ 'nComponents': nDims, 'nEpochs': params.steps })
125+
umap.initializeFit(X)
126+
for (let i = 0; i < params.steps; i++) {
127+
umap.step()
128+
}
129+
Y = umap.getEmbedding()
130+
} else if (params.method === 'Autoencoder') {
131+
console.log('[Vis] Fitting Autoencoder')
132+
// const ae = new Autoencoder({'nInputs': cols.length, 'nHidden': nDims, 'nLayers': 3, 'activation': 'tanh'})
133+
const ae = new Autoencoder({
134+
'encoder': [
135+
{'nOut': 20, 'activation': 'tanh'},
136+
{'nOut': nDims, 'activation': 'sigmoid'}
137+
],
138+
'decoder': [
139+
{'nOut': 20, 'activation': 'tanh'},
140+
{'nOut': cols.length}
141+
]
165142
})
166-
impMatrix.push(impTemp)
167-
})
168-
console.log('[Vis] Autoencoder importance matrix:', impMatrix)
169-
impMatrix = new Matrix(impMatrix).scaleColumns().to2DArray()
170-
} else {
171-
console.log('[Vis] Fitting t-SNE')
172-
const tsne = new TSNE({ 'epsilon': 10, 'dim': nDims })
173-
tsne.initDataRaw(X)
174-
const steps = params.steps || 100
175-
for (let k = 0; k <= steps; k++) {
176-
tsne.step()
143+
ae.fit(X, {
144+
'iterations': params.steps * 50,
145+
'stepSize': 0.005,
146+
'batchSize': 20,
147+
'method': 'adam'
148+
})
149+
Y = ae.encode(X)
150+
151+
impMatrix = []
152+
153+
console.log('[Vis] Generate importance matrix with Autoencoder')
154+
featuresFiltered.forEach((f, fi) => {
155+
const impTemp = []
156+
const Xr = []
157+
X.forEach(x => Xr.push(x.slice(0)))
158+
for (let i = Xr.length - 1; i > 0; i--) {
159+
const j = Math.floor(Math.random() * (i + 1))
160+
const x = Xr[i][fi]
161+
Xr[i][fi] = Xr[j][fi]
162+
Xr[j][fi] = x
163+
}
164+
const Xp = ae.predict(Xr)
165+
featuresFiltered.forEach((ff, ffi) => {
166+
const mse = Xp.reduce((a, x, xi) => Math.pow(x[ffi] - X[xi][ffi], 2) + a, 0) / Xp.length
167+
impTemp.push(mse)
168+
})
169+
impMatrix.push(impTemp)
170+
})
171+
console.log('[Vis] Autoencoder importance matrix:', impMatrix)
172+
impMatrix = new Matrix(impMatrix).scaleColumns().to2DArray()
173+
} else {
174+
console.log('[Vis] Fitting t-SNE')
175+
const tsne = new TSNE({ 'epsilon': 10, 'dim': nDims })
176+
tsne.initDataRaw(X)
177+
const steps = params.steps || 100
178+
for (let k = 0; k <= steps; k++) {
179+
tsne.step()
180+
}
181+
Y = tsne.getSolution()
177182
}
178-
Y = tsne.getSolution()
179-
}
183+
} // End projection
180184

181185
let target
182186
let colorscale
@@ -210,7 +214,7 @@ module.exports = class Process {
210214
}
211215
} else {
212216
console.log('[Vis] No target variable specified')
213-
target = Array(Y.length).fill(0)
217+
target = Array(X.length).fill(0)
214218
colorscale = [
215219
[0, '#8A8DA1'],
216220
[1, '#8A8DA1']
@@ -234,6 +238,22 @@ module.exports = class Process {
234238
impTemp.splice(i, 0, 0)
235239
return impTemp
236240
})
241+
} else if (params.importance === 'Mutual Information') {
242+
// impMatrix = []
243+
// for (let i = 0; i < featuresFiltered.length; i++) {
244+
// console.log(`Calculating ${i} of ${featuresFiltered.length}`)
245+
// const impMatrixRow = []
246+
// impMatrix.push(impMatrixRow)
247+
// for (let j = 0; j < featuresFiltered.length; j++) {
248+
// if (i === j) {
249+
// impMatrixRow.push(0)
250+
// } else {
251+
// const x = X.map(row => row[i])
252+
// const y = X.map(row => row[j])
253+
// impMatrixRow.push(Funzo(x).map().joint(Funzo(y).map()).mi(2)) //)mid(x, y))
254+
// }
255+
// }
256+
// }
237257
}
238258
}
239259

0 commit comments

Comments
 (0)