-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbigwig2array.py
More file actions
67 lines (58 loc) · 2.41 KB
/
bigwig2array.py
File metadata and controls
67 lines (58 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
import os
import pyBigWig
import pickle
import numpy as np
def bigwig2array(input_bw, output_pkl, resolution):
bw = pyBigWig.open(input_bw)
chroms = bw.chroms()
signal_dict = {}
for chrom in chroms:
print("Processing", chrom)
chrom_size = chroms[chrom]
print("Chrom size:", chrom_size)
signal = bw.values(chrom, 0, chrom_size, numpy=True)
#each resolution interval should sum to get the overall signal
cutoff_length = chrom_size // resolution * resolution
if cutoff_length == 0:
print("Chrom size is smaller than resolution, skipping")
continue
signal = signal[:cutoff_length]
signal = np.nan_to_num(signal)
if resolution > 1:
signal = signal.reshape(-1, resolution).mean(axis=1)
signal_dict[chrom] = signal
# the following problem is nan will have very big impact on overall calculation
# value_list = []
# for i in list(range(0, cutoff_length, resolution)):
# value_list.append(bw.stats(chrom, i, i + resolution)[0])
# value_list = [0 if v is None else v for v in value_list]
# signal = np.array(value_list)
# signal_dict[chrom] = signal
print("Finished procssing! Signal shape:", signal.shape)
#output signal stats
print("Signal stats: mean ",np.mean(signal), "std ", np.std(signal), "max ", np.max(signal), "min ", np.min(signal))
bw.close()
with open(output_pkl, 'wb') as f:
pickle.dump(signal_dict, f)
"""
This script converts bigwig file to array format specified by resolution.
```
python3 bigwig2array.py [input_bw] [output_pkl] [resolution]
```
[input_bw]: the input bigwig file. <br>
[output_pkl]: the output pkl file with [chrom]:[signal] format. <br>
[resolution]: the output resolution of the signal. <br>
"""
if __name__ == '__main__':
if len(sys.argv)!=4:
print("Usage: python bigwig2array.py [input_bw] [output_pkl] [resolution]")
print("input_bw: the input bigwig file")
print("output_pkl: the output pkl file with [chrom]:[signal] format")
print("resolution: the resolution the signal should be converted to")
sys.exit(1)
input_bw = os.path.abspath(sys.argv[1])
output_pkl = os.path.abspath(sys.argv[2])
resolution = int(sys.argv[3])
#convert bigwig to array
bigwig2array(input_bw, output_pkl, resolution)