Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
CUB: Integrate CUB 1.7.5 into Thrust to pull in the (corrected) fix f…
Browse files Browse the repository at this point in the history
…or small

data type radix sorting performance regressions.
Bug 1997368
Bug 200355591
git-commit b56409c060fe4c718066d19099fb12d8acdb2163
git-author Bryce Adelstein Lelbach aka wash <[email protected]>
VDVS: http://ausdvs.nvidia.com/Build_Results?virtualId=1000099285&which_page=current_build

Jobs: 1997368-2006 200355591-2006
[git-p4: depot-paths = "//sw/gpgpu/thrust/": change = 23593281]
  • Loading branch information
brycelelbach committed Feb 14, 2018
1 parent 5a0a118 commit 4b61388
Show file tree
Hide file tree
Showing 88 changed files with 318 additions and 610 deletions.
7 changes: 7 additions & 0 deletions internal/rename_cub_namespace.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#! /bin/bash

# Run this in //sw/gpgpu/thrust/thrust/system/cuda/detail/cub to add a THRUST_
# prefix to CUB's namespace macro.

sed -i -e 's/CUB_NS_P/THRUST_CUB_NS_P/g' `find . -type f`

7 changes: 7 additions & 0 deletions internal/reverse_rename_cub_namespace.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#! /bin/bash

# Run this in //sw/gpgpu/thrust/thrust/system/cuda/detail/cub to undo the
# renaming of CUB's namespace macro.

sed -i -e 's|THRUST_CUB_NS_P|CUB_NS_P|g' `find . -type f`

18 changes: 0 additions & 18 deletions internal/update_thrust_cub.sh

This file was deleted.

2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down
29 changes: 23 additions & 6 deletions thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -293,7 +293,7 @@ struct AgentRadixSortDownsweep
{
ValueT value = exchange_values[threadIdx.x + (ITEM * BLOCK_THREADS)];

if (FULL_TILE ||
if (FULL_TILE ||
(static_cast<OffsetT>(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items))
{
d_values_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = value;
Expand Down Expand Up @@ -332,6 +332,10 @@ struct AgentRadixSortDownsweep
Int2Type<false> is_full_tile,
Int2Type<_RANK_ALGORITHM> rank_algorithm)
{
// Register pressure work-around: moving valid_items through shfl prevents compiler
// from reusing guards/addressing from prior guarded loads
valid_items = ShuffleIndex(valid_items, 0, CUB_PTX_WARP_THREADS, 0xffffffff);

BlockLoadKeysT(temp_storage.load_keys).Load(
d_keys_in + block_offset, keys, valid_items, oob_item);

Expand Down Expand Up @@ -365,6 +369,10 @@ struct AgentRadixSortDownsweep
Int2Type<false> is_full_tile,
Int2Type<RADIX_RANK_MATCH> rank_algorithm)
{
// Register pressure work-around: moving valid_items through shfl prevents compiler
// from reusing guards/addressing from prior guarded loads
valid_items = ShuffleIndex(valid_items, 0, CUB_PTX_WARP_THREADS, 0xffffffff);

LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys, valid_items, oob_item);
}

Expand Down Expand Up @@ -398,6 +406,10 @@ struct AgentRadixSortDownsweep
Int2Type<false> is_full_tile,
Int2Type<_RANK_ALGORITHM> rank_algorithm)
{
// Register pressure work-around: moving valid_items through shfl prevents compiler
// from reusing guards/addressing from prior guarded loads
valid_items = ShuffleIndex(valid_items, 0, CUB_PTX_WARP_THREADS, 0xffffffff);

BlockLoadValuesT(temp_storage.load_values).Load(
d_values_in + block_offset, values, valid_items);

Expand All @@ -411,7 +423,7 @@ struct AgentRadixSortDownsweep
__device__ __forceinline__ void LoadValues(
ValueT (&values)[ITEMS_PER_THREAD],
OffsetT block_offset,
volatile OffsetT valid_items,
OffsetT valid_items,
Int2Type<true> is_full_tile,
Int2Type<RADIX_RANK_MATCH> rank_algorithm)
{
Expand All @@ -425,10 +437,14 @@ struct AgentRadixSortDownsweep
__device__ __forceinline__ void LoadValues(
ValueT (&values)[ITEMS_PER_THREAD],
OffsetT block_offset,
volatile OffsetT valid_items,
OffsetT valid_items,
Int2Type<false> is_full_tile,
Int2Type<RADIX_RANK_MATCH> rank_algorithm)
{
// Register pressure work-around: moving valid_items through shfl prevents compiler
// from reusing guards/addressing from prior guarded loads
valid_items = ShuffleIndex(valid_items, 0, CUB_PTX_WARP_THREADS, 0xffffffff);

LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values, valid_items);
}

Expand All @@ -444,10 +460,10 @@ struct AgentRadixSortDownsweep
OffsetT valid_items,
Int2Type<false> /*is_keys_only*/)
{
CTA_SYNC();

ValueT values[ITEMS_PER_THREAD];

CTA_SYNC();

LoadValues(
values,
block_offset,
Expand Down Expand Up @@ -746,6 +762,7 @@ struct AgentRadixSortDownsweep
else
{
// Process full tiles of tile_items
#pragma unroll 1
while (block_offset + TILE_ITEMS <= block_end)
{
ProcessTile<true>(block_offset);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/cub/agent/agent_reduce.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down
12 changes: 5 additions & 7 deletions thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -454,13 +454,13 @@ struct AgentReduceByKey
// Perform exclusive tile scan
OffsetValuePairT block_aggregate; // Inclusive block-wide scan aggregate
OffsetT num_segments_prefix; // Number of segments prior to this tile
ValueOutputT total_aggregate; // The tile prefix folded with block_aggregate
OffsetValuePairT total_aggregate; // The tile prefix folded with block_aggregate
if (tile_idx == 0)
{
// Scan first tile
BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, block_aggregate);
num_segments_prefix = 0;
total_aggregate = block_aggregate.value;
total_aggregate = block_aggregate;

// Update tile status if there are successor tiles
if ((!IS_LAST_TILE) && (threadIdx.x == 0))
Expand All @@ -474,9 +474,7 @@ struct AgentReduceByKey

block_aggregate = prefix_op.GetBlockAggregate();
num_segments_prefix = prefix_op.GetExclusivePrefix().key;
total_aggregate = reduction_op(
prefix_op.GetExclusivePrefix().value,
block_aggregate.value);
total_aggregate = prefix_op.GetInclusivePrefix();
}

// Rezip scatter items and segment indices
Expand Down Expand Up @@ -506,7 +504,7 @@ struct AgentReduceByKey
if (num_remaining == TILE_ITEMS)
{
d_unique_out[num_segments] = keys[ITEMS_PER_THREAD - 1];
d_aggregates_out[num_segments] = total_aggregate;
d_aggregates_out[num_segments] = total_aggregate.value;
num_segments++;
}

Expand Down
6 changes: 3 additions & 3 deletions thrust/system/cuda/detail/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -618,8 +618,8 @@ struct AgentRle
OffsetT num_items, ///< Total number of global input items
OffsetT num_remaining, ///< Number of global input items remaining (including this tile)
int tile_idx, ///< Tile index
OffsetT tile_offset, ///< Tile offset
ScanTileStateT &tile_status) ///< Global list of tile status
OffsetT tile_offset, ///< Tile offset
ScanTileStateT &tile_status) ///< Global list of tile status
{
if (tile_idx == 0)
{
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/cub/agent/agent_select_if.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down
Loading

0 comments on commit 4b61388

Please sign in to comment.