Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip paths for which path.to_obg() returns False. #7

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM ubuntu:20.04
ENV PIP_BREAK_SYSTEM_PACKAGES 1
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

RUN apt-get update
RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata parallel
RUN apt install --assume-yes git python3 python3-pip pkg-config build-essential wget
RUN pip3 install matplotlib
RUN git clone https://github.com/cgroza/graph_peak_caller.git \
RUN cd graph_peak_caller && pip3 install .

RUN wget https://github.com/vgteam/vg/releases/download/v1.58.0/vg -O /usr/bin/vg
RUN chmod +x /usr/bin/vg

ENTRYPOINT [ "/bin/bash", "-l", "-c" ]
3 changes: 3 additions & 0 deletions graph_peak_caller/analysis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def get_linear_paths_in_graph(ob_graph, vg_graph, write_to_file_name=None):
intervals = {}
for path in vg_graph.paths:
obg_interval = path.to_obg(ob_graph=ob_graph)
if not obg_interval:
logging.info("OBG interval for path " + path.name + " is False. Skipping.")
continue
obg_interval.name = path.name
print("Path name: %s" % path.name)
intervals[obg_interval.name] = obg_interval
Expand Down
2 changes: 1 addition & 1 deletion graph_peak_caller/callpeaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def __get_max_paths(self):
assert max_path.length() >= 0, "Max path %s has negative length" % max_path
if max_path.length() == 0:
logging.warning("Max path has 0 length: %s" % max_path)
max_path.set_score(0)
max_path.set_score(float(0.0))
continue

score = np.max(self.q_values.get_interval_values(max_path))
Expand Down
10 changes: 8 additions & 2 deletions graph_peak_caller/control/linearmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ def to_file(self, filename):
def find_starts(graph, node_ids=None):
if node_ids is None:
node_ids = list(graph.get_topological_sorted_node_ids())
max_dists = np.zeros(len(node_ids))
# this does not work for node id spaces that are not compactec
# max_dists = np.zeros(len(node_ids))
# find size of array from the largest named node instead
max_dists = np.zeros(int(graph.get_sorted_node_ids()[-1]))
n_processed = 0
for node_id in node_ids:
if n_processed % 500000 == 0:
Expand All @@ -139,7 +142,10 @@ def find_ends(graph, node_ids=None):
adj_list = graph.reverse_adj_list
if node_ids is None:
node_ids = list(graph.get_sorted_node_ids(reverse=True))
max_dists = np.zeros(len(node_ids))
# this does not work for node id spaces that are not compactec
# max_dists = np.zeros(len(node_ids))
# find size of array from the largest named node instead
max_dists = np.zeros(int(graph.get_sorted_node_ids()[-1]))
n_processed = 0
for node_id in node_ids:
if n_processed % 500000 == 0:
Expand Down
2 changes: 1 addition & 1 deletion graph_peak_caller/legacy/pvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,6 @@ def translation(x):
logging.error("P value not found in mapping dict. Could be due to rounding errors.")
return self.p_to_q_values[x]

trans = np.vectorize(translation, otypes=[np.float])
trans = np.vectorize(translation, otypes=[float])
new_values = trans(p_values)
return new_values
2 changes: 1 addition & 1 deletion graph_peak_caller/sparsediffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def threshold_copy(self, cutoff):
return new

def to_dense_pileup(self, size):
if self.values.dtype == np.bool:
if self.values.dtype == bool:
values = self.values.astype("int")
else:
values = self.values
Expand Down
2 changes: 1 addition & 1 deletion graph_peak_caller/sparsepvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,5 @@ def get_q_values(self):

def get_q_array_from_p_array(self, p_values):
assert isinstance(p_values, np.ndarray)
trans = np.vectorize(self.p_to_q_values.get, otypes=[np.float])
trans = np.vectorize(self.p_to_q_values.get, otypes=[float])
return trans(p_values)