Skip to content

Commit 6ad866e

Browse files
committed
⛓ Add CUDA version of time series similarity code
1 parent 979ecb5 commit 6ad866e

File tree

1 file changed

+68
-13
lines changed

1 file changed

+68
-13
lines changed

Diff for: lectures/25-divergence/branch-div.cu

+68-13
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,20 @@
99
*/
1010

1111
#include <chrono> // timing library
12+
#include <numeric> // std::accumulate()
1213
#include <iostream>
1314

15+
#include <vector>
16+
1417
namespace { // anonymous
1518

1619
using time_series = float[ 4 ];
1720
#include "../21-simt-trees/4d-points.hpp"
1821

19-
} // namespace anonymous
20-
21-
22-
namespace cpu {
23-
2422
/**
2523
* Determines whether two time_series are within a piecewise threshold of each other.
2624
*/
25+
__host__ __device__
2726
bool near_match( time_series const t1, time_series const t2, float const threshold )
2827
{
2928
for( auto i = 0u; i < 4u; ++i )
@@ -36,6 +35,24 @@ bool near_match( time_series const t1, time_series const t2, float const thresho
3635
return true;
3736
}
3837

38+
__host__ __device__
39+
bool check_match( time_series * const data, uint32_t const i, float const threshold, size_t const n )
40+
{
41+
for( auto j = 0lu; j < n; ++j )
42+
{
43+
if( i != j && near_match( data[ i ], data[ j ], threshold ) )
44+
{
45+
return true;
46+
}
47+
}
48+
return false;
49+
}
50+
51+
} // namespace anonymous
52+
53+
54+
namespace cpu {
55+
3956
/**
4057
* Calculates the number of time series that match at least one other in the dataset,
4158
* given a particular threshold for piecewise error tolerance.
@@ -48,21 +65,58 @@ template < typename T >
4865
#pragma omp parallel for reduction ( +:count )
4966
for( auto i = 0lu; i < n; ++i )
5067
{
51-
for( auto j = 0lu; j < n; ++j )
68+
if( check_match( points, i, threshold, n ) )
5269
{
53-
if( i != j && near_match( points[ i ], points[ j ], threshold ) )
54-
{
55-
++count;
56-
break;
57-
}
70+
++count;
5871
}
5972
}
6073

6174
return count;
6275
}
6376

6477
} // namespace cpu
78+
namespace gpu {
79+
80+
auto const block_size = 512u;
81+
82+
template < typename S, typename T >
83+
__global__
84+
void has_match( uint8_t * result, S * data, T const threshold, size_t const n )
85+
{
86+
auto const i = threadIdx.x + blockIdx.x * blockDim.x;
87+
88+
if( i < n )
89+
{
90+
result[ i ] = check_match( data, i, threshold, n );
91+
}
92+
}
93+
94+
template < typename T >
95+
T similar_time_series( T const threshold, size_t const n )
96+
{
97+
auto const size = n * sizeof( points[ 0 ] );
98+
auto const num_blocks = ceil( n / static_cast< float >( block_size ) );
99+
100+
std::vector< uint8_t > found_masks( n, 0 );
101+
uint8_t *dev_output;
102+
time_series *dev_input;
103+
104+
cudaMalloc( (void **) &dev_output, n * sizeof( uint8_t ) );
105+
cudaMalloc( (void **) &dev_input, size );
106+
107+
cudaMemcpy( dev_input, points, size, cudaMemcpyHostToDevice );
108+
109+
has_match<<< num_blocks, block_size >>>( dev_output, dev_input, threshold, n );
110+
111+
cudaMemcpy( found_masks.data(), dev_output, n * sizeof( uint8_t ), cudaMemcpyDeviceToHost );
112+
113+
cudaFree( dev_input );
114+
cudaFree( dev_output );
115+
116+
return std::accumulate( found_masks.cbegin(), found_masks.cend(), 0.0f );
117+
}
65118

119+
} // namespace gpu
66120

67121

68122
int main( int argc, char **argv )
@@ -75,15 +129,16 @@ int main( int argc, char **argv )
75129
}
76130

77131
auto const num_trials = 20u;
78-
auto const threshold = atof( argv[ 1 ] );
132+
auto const threshold = static_cast< float >( atof( argv[ 1 ] ) );
79133
auto const n = sizeof( points ) / sizeof( points[ 0 ] ) / 25; // increase at your own risk!
80134
auto sum = 0.0;
81135

82136
auto const start_time = std::chrono::system_clock::now();
83137

84138
for( auto i = 0u; i < num_trials; ++i )
85139
{
86-
sum += cpu::similar_time_series( threshold, n );
140+
// sum += cpu::similar_time_series( threshold, n );
141+
sum += gpu::similar_time_series( threshold, n );
87142
}
88143

89144
auto const end_time = std::chrono::system_clock::now();

0 commit comments

Comments
 (0)