HKU-BAL
diff --git a/‎.idea/Clai3_gpu_github.iml‎
Lines changed: 12 additions & 0 deletions b/‎.idea/Clai3_gpu_github.iml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎Dockerfile.gpu‎
Lines changed: 44 additions & 0 deletions b/‎Dockerfile.gpu‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 45 additions & 13 deletions b/‎Makefile‎
Lines changed: 45 additions & 13 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 5 deletions b/‎README.md‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎clair3.py‎
Lines changed: 3 additions & 2 deletions b/‎clair3.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎clair3/CallVariants.py‎
Lines changed: 54 additions & 56 deletions b/‎clair3/CallVariants.py‎
Lines changed: 54 additions & 56 deletions
@@ -0,0 +1,44 @@
+FROM tensorflow/tensorflow:2.15.0-gpu
+
+RUN apt-get update && apt-get install -y \
+    wget \
+    git \
+    cmake \
+    build-essential \
+    automake \
+    xz-utils \
+    pigz \
+    zlib1g-dev \
+    libbz2-dev \
+    liblzma-dev \
+    libcurl4-openssl-dev \
+    samtools \
+    parallel \
+    libboost-graph-dev \
+    libssl-dev \
+    libdeflate-dev \
+    time \
+    tabix \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ENV PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/bin
+
+WORKDIR /opt/bin
+
+RUN pip3 install --no-cache-dir whatshap cffi
+
+COPY . .
+
+RUN cd /opt/bin/preprocess/realign && \
+    g++ -std=c++14 -O1 -shared -fPIC -o realigner ssw_cpp.cpp ssw.c realigner.cpp && \
+    g++ -std=c++11 -shared -fPIC -o debruijn_graph -O3 debruijn_graph.cpp && \
+    wget http://www.bio8.cs.hku.hk/clair3/clair3_models/clair3_models.tar.gz -P /opt/models && \
+    tar -zxvf /opt/models/clair3_models.tar.gz -C /opt/models && \
+    rm /opt/models/clair3_models.tar.gz && \
+    cd /opt/bin && make PREFIX=/usr/local PYTHON=/usr/bin/python3 && \
+    rm -rf /opt/bin/samtools-* /opt/bin/longphase-* && \
+    cd /opt/bin && wget -q https://downloads.python.org/pypy/pypy3.9-v7.3.8-linux64.tar.bz2 && \
+    tar -xjf pypy3.9-v7.3.8-linux64.tar.bz2 && \
+    rm pypy3.9-v7.3.8-linux64.tar.bz2 && \
+    ln -sf /opt/bin/pypy3.9-v7.3.8-linux64/bin/pypy3 /opt/bin/pypy3
@@ -3,37 +3,66 @@ ARCH := $(shell arch)
 
 PYTHON ?= python3
 
-all : libhts.a longphase libclair3.so
+all : libhts.a longphase libclair3.so models
 clean : clean_htslib clean_longphase clean_libclair3
 
 SAMVER	=	1.15.1
-LPVER	=	1.7.3
-GCC	?=	gcc
-GXX	?=	g++
 PREFIX	?=	${CONDA_PREFIX}
 LDFLAGS	=	-L ${PREFIX}/lib
 CFLAGS	= -fpic -std=c99 -O3 -I ${PREFIX}/include -L ${PREFIX}/lib
 CPPFLAGS	=	-std=c++11 -Wall -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib
 
+ifeq ($(OS),Darwin)
+    # Mac settings
+    LPVER := 1.5
+    CC_PATH ?= clang
+    GCC ?= $(CC_PATH)
+else
+    # Linux settings
+    LPVER := 1.7.3
+    GCC ?= gcc
+    GXX ?= g++
+endif
+
 
 samtools-$(SAMVER)/Makefile:
-		curl -L -o samtools-${SAMVER}.tar.bz2 https://github.com/samtools/samtools/releases/download/${SAMVER}/samtools-${SAMVER}.tar.bz2; \
-		tar -xjf samtools-${SAMVER}.tar.bz2; \
-		rm samtools-${SAMVER}.tar.bz2
+	curl -L -o samtools-${SAMVER}.tar.bz2 https://github.com/samtools/samtools/releases/download/${SAMVER}/samtools-${SAMVER}.tar.bz2
+	tar -xjf samtools-${SAMVER}.tar.bz2
+	rm samtools-${SAMVER}.tar.bz2
 
 libhts.a: samtools-$(SAMVER)/Makefile
 	# this is required only to add in -fpic so we can build python module
 	@echo "\x1b[1;33mMaking $(@F)\x1b[0m"
-	cd samtools-${SAMVER}/htslib-${SAMVER}; CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}" ./configure; make CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}"
+	cd samtools-${SAMVER}/htslib-${SAMVER}; \
+	if [ "$(OS)" = "Darwin" ]; then \
+		autoheader && autoconf -Wno-syntax && ./configure; \
+	else \
+		CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}" ./configure; \
+	fi; \
+	make CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}"
 	cp samtools-${SAMVER}/htslib-${SAMVER}/$@ $@
 
 
 longphase:
-	curl -L -o longphase-${LPVER}.tar.xz https://github.com/twolinin/longphase/releases/download/v${LPVER}/longphase_linux-x64.tar.xz
-	tar -xJf longphase-${LPVER}.tar.xz
-	mv longphase_linux-x64 $@
-	rm longphase-${LPVER}.tar.xz
-
+	if [ "$(OS)" = "Darwin" ]; then \
+		curl -L -o v${LPVER}.tar.gz https://github.com/twolinin/longphase/archive/refs/tags/v${LPVER}.tar.gz; \
+		tar -zxf v${LPVER}.tar.gz; \
+		cd longphase-${LPVER} && export CC=${CC_PATH} && autoreconf -i && ./configure && make -j; \
+		cd .. && rm v${LPVER}.tar.gz; \
+		cp longphase-${LPVER}/longphase $@; \
+	else \
+		curl -L -o longphase-${LPVER}.tar.xz https://github.com/twolinin/longphase/releases/download/v${LPVER}/longphase_linux-x64.tar.xz; \
+		tar -xJf longphase-${LPVER}.tar.xz; \
+		mv longphase_linux-x64 $@; \
+		rm longphase-${LPVER}.tar.xz; \
+	fi
+
+models:
+	if [ "$(OS)" = "Darwin" ]; then \
+        curl -L -o clair3_models.tar.gz http://www.bio8.cs.hku.hk/clair3/clair3_models/clair3_models.tar.gz; \
+        mkdir -p ${PREFIX}/bin/models && tar -zxvf clair3_models.tar.gz -C ${PREFIX}/bin/models; \
+        rm clair3_models.tar.gz; \
+    fi
 
 libclair3.so: samtools-${SAMVER}/htslib-${SAMVER} libhts.a
 	${PYTHON} build.py
@@ -57,6 +86,9 @@ clean_htslib:
 .PHONY: clean_longphase
 clean_longphase:
 	rm longphase
+	if [ "$(OS)" = "Darwin" ]; then \
+		rm -rf $(LONGPHASE_DIR); \
+	fi
 
 .PHONY: clean_libclair3
 clean_libclair3:
 
@@ -65,6 +65,12 @@ For somatic variant calling using **tumor-only** samples, please try [ClairS-TO]
 ----
 
 ## Latest Updates
+*v1.2.0 (Aug 1, 2025)* : 1. Clair3 now natively supports GPU on Linux and Apple Silicon. Please refer to the  [GPU quick start guide](docs/gpu_quick_start.md) for usage. Clair3 on GPU runs ~5 times compared CPU. Below is quick speed comparison. 
+
+<div align="center">
+    <img src="docs/images/clair3_gpu_benchmark.png" width = "400" alt="Clair3 gpu benchmark">
+</div>
+
 *v1.1.2 (Jul 10, 2025)* : 1. Added boundary check where an insertion is immediately followed by a soft-clipping ([#394](https://github.com/HKU-BAL/Clair3/issues/394), co-contributor @[Devon Ryan](https://github.com/dpryan79)) 2. Added exit code checking for all parallel jobs. The pipeline now immediately exits when encountering any job failure ([#392](https://github.com/HKU-BAL/Clair3/issues/392), co-contributor @[Sam Nicholls](https://github.com/SamStudio8)).
 
 *v1.1.1 (May 19, 2025)* : 1. Fixed the malformed VCF header issue that occurred specifically in AWS cloud environments([#380](https://github.com/HKU-BAL/Clair3/issues/380)). 2.Added a Clair3 R10.4.1 model fine-tuned on 12 [bacterial genomes](https://elifesciences.org/reviewed-preprints/98300) with improved variant calling performance for bacterial samples. Performance benchmarks and detailed results are documented in our note ["fine-tuning_Clair3_with_12_bacteria_samples"](docs/fine-tuning_Clair3_with_12_bacteria_samples.pdf), (co-contributor @[William Shropshire](https://github.com/wshropshire)) .
@@ -204,6 +210,8 @@ Check the results using `less ${HOME}/clair3_ont_quickDemo/output/merge_output.v
 
 ## Installation
 
+**Clair3 support GPU calling on Linux and Apple macOS (M1/M2/M3 chips) systems. Please refer the [GPU quick start](docs/gpu_quick_start.md) to install and run Clair3 with GPU.**
+
 ### Option 1.  Docker pre-built image
 
 A pre-built docker image is available [here](https://hub.docker.com/r/hkubal/clair3). With it you can run Clair3 using a single command.
@@ -364,11 +372,6 @@ docker build -f ./Dockerfile -t hkubal/clair3:latest .
 docker run -it hkubal/clair3:latest /opt/bin/run_clair3.sh --help
 ```
 
-
-### Run Clair3 with Apple Silicon
-
-Instructions are given as an answer to issue [#149](https://github.com/HKU-BAL/Clair3/issues/149).
-
 ----
 
 ## Usage
 
@@ -11,7 +11,8 @@
     "CallVarBam",
     "CallVariants",
     "Train",
-    "CallVariantsFromCffi"
+    "CallVariantsFromCffi",
+    "CallVariantsFromCffiGPU",
 ]
 
 data_preprocess_folder = [
@@ -30,7 +31,7 @@
     'CheckEnvs',
     'SortVcf',
     'SelectQual',
-    "CreateTensorPileupFromCffi"
+    "CreateTensorPileupFromCffi",
     "CreateTensorFullAlignmentFromCffi",
     "CheckExitCode",
 ]
 
@@ -221,6 +221,36 @@ def Run(args):
     else:
         call_variants(args=args, output_config=output_config, output_utilities=output_utilities)
 
+def print_debug_message(
+        chromosome,
+        position,
+        gt21_probabilities,
+        genotype_probabilities,
+        variant_length_probabilities_1,
+        variant_length_probabilities_2,
+        extra_infomation_string=""
+):
+    print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
+        chromosome,
+        position,
+        ["{:0.8f}".format(x) for x in gt21_probabilities],
+        ["{:0.8f}".format(x) for x in genotype_probabilities],
+        ["{:0.8f}".format(x) for x in variant_length_probabilities_1],
+        ["{:0.8f}".format(x) for x in variant_length_probabilities_2],
+        extra_infomation_string
+    ))
+
+def gen_output_file():
+    return
+
+def close_opened_files():
+    return
+def output_header(reference_file_path,
+                  cmd_fn,
+                  sample_name):
+    header_str = get_header(reference_file_path=reference_file_path, cmd_fn=cmd_fn, sample_name=sample_name)
+    return
+
 
 def output_utilties_from(
         sample_name,
@@ -231,52 +261,8 @@ def output_utilties_from(
         output_probabilities,
         cmd_fn=None,
 ):
-    def gen_output_file():
-        global output_file
-        if not output_probabilities:
-            output_file = open(output_file_path, "w")
-
-    def output(string_value):
-        global output_file
-        string_value += '\n'
-        output_file.write(string_value)
-
-    def print_debug_message(
-            chromosome,
-            position,
-            gt21_probabilities,
-            genotype_probabilities,
-            variant_length_probabilities_1,
-            variant_length_probabilities_2,
-            extra_infomation_string=""
-    ):
-        output("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
-            chromosome,
-            position,
-            ["{:0.8f}".format(x) for x in gt21_probabilities],
-            ["{:0.8f}".format(x) for x in genotype_probabilities],
-            ["{:0.8f}".format(x) for x in variant_length_probabilities_1],
-            ["{:0.8f}".format(x) for x in variant_length_probabilities_2],
-            extra_infomation_string
-        ))
-
-    def close_opened_files():
-        output_file.close()
-
-    def output_header():
-        if is_output_for_ensemble:
-            return
-
-        header_str = get_header(reference_file_path=reference_file_path, cmd_fn=cmd_fn, sample_name=sample_name)
-        output(header_str)
-
-    return OutputUtilities(
-        print_debug_message,
-        output,
-        output_header,
-        close_opened_files,
-        gen_output_file
-    )
+    #deprecated since v1.2.0
+    return
 
 
 def homo_Ins_tuples_from(variant_length_probabilities_1, variant_length_probabilities_2, extra_probability):
@@ -1045,7 +1031,7 @@ def batch_output_for_ensemble(X, batch_chr_pos_seq, alt_info_list, batch_Y, outp
         )
 
 
-def batch_output(batch_chr_pos_seq, alt_info_list, batch_Y, output_config, output_utilities):
+def batch_output(batch_chr_pos_seq, alt_info_list, batch_Y, output_config, output_utilities, args=None):
     batch_size = len(batch_chr_pos_seq)
 
     batch_gt21_probabilities, batch_genotype_probabilities = batch_Y[:,:param.label_shape_cum[0]], batch_Y[:,param.label_shape_cum[0]:param.label_shape_cum[1]]
@@ -1056,6 +1042,7 @@ def batch_output(batch_chr_pos_seq, alt_info_list, batch_Y, output_config, outpu
         )
     batch_variant_length_probabilities_1, batch_variant_length_probabilities_2 = [0] * batch_size, [0] * batch_size
 
+    batch_output_result = ""
     if output_config.add_indel_length:
         batch_variant_length_probabilities_1, batch_variant_length_probabilities_2 = batch_Y[:,param.label_shape_cum[1]:param.label_shape_cum[2]], batch_Y[:,param.label_shape_cum[2]:param.label_shape_cum[3]]
     for (
@@ -1073,7 +1060,7 @@ def batch_output(batch_chr_pos_seq, alt_info_list, batch_Y, output_config, outpu
         batch_variant_length_probabilities_1,
         batch_variant_length_probabilities_2
     ):
-        output_with(
+        output_row = output_with(
             chr_pos_seq,
             alt_info,
             gt21_probabilities,
@@ -1084,6 +1071,14 @@ def batch_output(batch_chr_pos_seq, alt_info_list, batch_Y, output_config, outpu
             output_utilities,
         )
 
+        if output_row is not None:
+            if args is not None:
+                args.output_file.write(output_row)
+            else:
+                #store the vcf output in batch in gpu mode
+                batch_output_result += output_row
+
+    return batch_output_result
 
 def output_with(
         chr_pos_seq,
@@ -1332,7 +1327,7 @@ def decode_alt_info(alt_info_dict, ref_base=None):
                                  alternate_base)
             PLs = ','.join([str(x) for x in PLs])
 
-            output_utilities.output("%s\t%d\t.\t%s\t%s\t%.2f\t%s\t%s\tGT:GQ:DP:AD:AF:PL\t%s:%d:%d:%s:%s:%s" % (
+            return "%s\t%d\t.\t%s\t%s\t%.2f\t%s\t%s\tGT:GQ:DP:AD:AF:PL\t%s:%d:%d:%s:%s:%s\n" % (
                     chromosome,
                     position,
                     reference_base,
@@ -1346,9 +1341,9 @@ def decode_alt_info(alt_info_dict, ref_base=None):
                     allele_depth,
                     allele_frequency_s,
                     PLs
-                ))
+                )
         else:
-            output_utilities.output("%s\t%d\t.\t%s\t%s\t%.2f\t%s\t%s\tGT:GQ:DP:AD:AF\t%s:%d:%d:%s:%s" % (
+            return "%s\t%d\t.\t%s\t%s\t%.2f\t%s\t%s\tGT:GQ:DP:AD:AF\t%s:%d:%d:%s:%s\n" % (
                 chromosome,
                 position,
                 reference_base,
@@ -1361,7 +1356,7 @@ def decode_alt_info(alt_info_dict, ref_base=None):
                 read_depth,
                 allele_depth,
                 allele_frequency_s,
-            ))
+            )
 
 
 def compute_PL(genotype_string, genotype_probabilities, gt21_probabilities, reference_base, alternate_base):
@@ -1443,8 +1438,11 @@ def call_variants(args, output_config, output_utilities):
 
     m.load_weights(args.chkpnt_fn)
 
-    output_utilities.gen_output_file()
-    output_utilities.output_header()
+    args.output_file = open(args.call_fn, 'w') if args.call_fn != 'PIPE' else sys.stdout
+    header_str = get_header(reference_file_path=args.ref_fn, cmd_fn=args.cmd_fn, sample_name=args.sampleName)
+    header_str += '\n'
+    args.output_file.write(header_str)
+
     chunk_id = args.chunk_id - 1 if args.chunk_id else None  # 1-base to 0-base
     chunk_num = args.chunk_num
     full_alignment_mode = not args.pileup
@@ -1478,7 +1476,7 @@ def load_mini_batch():
                     total += len(X)
                     thread_pool.append(Thread(
                         target=batch_output_method,
-                        args=(position, alt_info_list, prediction, output_config, output_utilities)
+                        args=(position, alt_info_list, prediction, output_config, output_utilities, args)
                     ))
 
                 if not is_finish_loaded_all_mini_batches:
@@ -1561,7 +1559,7 @@ def load_mini_batch():
 
     logging.info("Total time elapsed: %.2f s" % (time() - variant_call_start_time))
 
-    output_utilities.close_opened_files()
+    args.output_file.close()
     # remove file if on variant in output
     if os.path.exists(args.call_fn):
         for row in open(args.call_fn, 'r'):