From 1ee4f17d0026211fd50a75cd5843b5d4fb37291c Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Thu, 7 Nov 2024 11:34:38 +0100 Subject: [PATCH] Prints and hard code shift --- .../cuda/include/integer/integer_utilities.h | 2 +- .../cuda/src/integer/bitwise_ops.cuh | 2 +- .../tfhe-cuda-backend/cuda/src/integer/integer.cuh | 14 ++++++++++++++ .../src/pbs/programmable_bootstrap_cg_multibit.cuh | 2 +- tfhe/src/integer/gpu/ciphertext/mod.rs | 1 + .../src/integer/gpu/server_key/radix/bitwise_op.rs | 1 + tfhe/src/shortint/server_key/bivariate_pbs.rs | 6 ++++-- 7 files changed, 23 insertions(+), 5 deletions(-) diff --git a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h index 6dc85d422..9c2ffcb65 100644 --- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h +++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h @@ -2915,7 +2915,7 @@ template struct int_bitop_buffer { generate_device_accumulator_bivariate( streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0), params.glwe_dimension, params.polynomial_size, - params.message_modulus, params.carry_modulus, lut_bivariate_f); + 2, 2, lut_bivariate_f); lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]); } break; diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh index c7a8a3114..581d6600c 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh @@ -20,7 +20,7 @@ __host__ void host_integer_radix_bitop_kb( auto lut = mem_ptr->lut; - printf("Here\n"); + printf("Here shift: %d\n", lut->params.message_modulus); integer_radix_apply_bivariate_lookup_table_kb( streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2, bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus); diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh index 9f30afa0a..e7d0ee247 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh @@ -365,7 +365,21 @@ __host__ void integer_radix_apply_bivariate_lookup_table_kb( // In the case of extracting a single LWE this parameters are dummy uint32_t lut_count = 1; uint32_t lut_stride = 0; + Torus *lwe_in_1 = (Torus *)malloc(sizeof(Torus)*(big_lwe_dimension + 1)*num_radix_blocks); + cuda_memcpy_async_to_cpu(lwe_in_1, lwe_array_1, sizeof(Torus)*(big_lwe_dimension + 1)*num_radix_blocks, streams[0], gpu_indexes[0]); + cudaDeviceSynchronize(); + for (uint i = 0; i < num_radix_blocks; i++) { + printf("in body cuda 1: %lu\n", lwe_in_1[i * (big_lwe_dimension + 1) + big_lwe_dimension]); + } + Torus *lwe_in_2 = (Torus *)malloc(sizeof(Torus)*(big_lwe_dimension + 1)*num_radix_blocks); + cuda_memcpy_async_to_cpu(lwe_in_2, lwe_array_2, sizeof(Torus)*(big_lwe_dimension + 1)*num_radix_blocks, streams[0], gpu_indexes[0]); + cudaDeviceSynchronize(); + for (uint i = 0; i < num_radix_blocks; i++) { + printf("in body cuda 2: %lu\n", lwe_in_2[i * (big_lwe_dimension + 1) + big_lwe_dimension]); + } + shift = 2; + printf("shift in pack: %d\n", shift); // Left message is shifted auto lwe_array_pbs_in = lut->tmp_lwe_before_ks; pack_bivariate_blocks(streams, gpu_indexes, gpu_count, diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh index 1b2eb56d1..646d8f4b2 100644 --- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh @@ -399,7 +399,7 @@ template __host__ bool verify_cuda_programmable_bootstrap_cg_multi_bit_grid_size( int glwe_dimension, int level_count, int num_samples) { - // If Cooperative Groups is not supported, no need to check anything else + // If Cooperative Groups is not supported, no need to check anything else if (!cuda_check_support_cooperative_groups()) return false; diff --git a/tfhe/src/integer/gpu/ciphertext/mod.rs b/tfhe/src/integer/gpu/ciphertext/mod.rs index ed799d20a..83ad56e97 100644 --- a/tfhe/src/integer/gpu/ciphertext/mod.rs +++ b/tfhe/src/integer/gpu/ciphertext/mod.rs @@ -150,6 +150,7 @@ impl CudaUnsignedRadixCiphertext { ); let d_blocks = CudaLweCiphertextList::from_lwe_ciphertext_list(&h_ct, streams); + println!("cpu msg modulus cpu here: {}", radix.blocks.first().unwrap().message_modulus.0); let info = CudaRadixCiphertextInfo { blocks: radix .blocks diff --git a/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs b/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs index 43e82bcd7..8d86a33dc 100644 --- a/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs +++ b/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs @@ -179,6 +179,7 @@ impl CudaServerKey { ); let lwe_ciphertext_count = ct_left.as_ref().d_blocks.lwe_ciphertext_count(); + println!("message modulus here: {}", ct_left.as_ref().info.blocks.first().unwrap().message_modulus.0); match &self.bootstrapping_key { CudaBootstrappingKey::Classic(d_bsk) => { diff --git a/tfhe/src/shortint/server_key/bivariate_pbs.rs b/tfhe/src/shortint/server_key/bivariate_pbs.rs index f99ee93ce..41ffacb49 100644 --- a/tfhe/src/shortint/server_key/bivariate_pbs.rs +++ b/tfhe/src/shortint/server_key/bivariate_pbs.rs @@ -156,12 +156,14 @@ impl ServerKey { let modulus = (ct_right.degree.get() + 1) as u64; assert!(modulus <= acc.ct_right_modulus.0 as u64); + println!("CPU input 1: {}", &ct_left.ct.get_body().data); + println!("CPU input 2: {}", &ct_right.ct.get_body().data); + println!("Shift on CPU: {}", acc.ct_right_modulus.0 as u8); self.unchecked_scalar_mul_assign(ct_left, acc.ct_right_modulus.0 as u8); unchecked_add_assign(ct_left, ct_right); - println!("CPU input to KS PBS"); - println!("{}", &ct_left.ct.get_body().data); + println!("CPU input to KS PBS: {}", &ct_left.ct.get_body().data); // Compute the PBS self.apply_lookup_table_assign(ct_left, &acc.acc); }