Skip to content

Commit

Permalink
SpMSpVMasked
Browse files Browse the repository at this point in the history
  • Loading branch information
kirillgarbar committed Jun 1, 2024
1 parent a4da73d commit b6e13fd
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 81 deletions.
34 changes: 12 additions & 22 deletions src/GraphBLAS-sharp.Backend/Algorithms/BFS.fs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ module internal BFS =
Operations.SpMVInPlace add mul clContext workGroupSize

let spMSpV =
Operations.SpMSpVBool add mul clContext workGroupSize
Operations.SpMSpVMaskedBool add mul clContext workGroupSize

let zeroCreate =
Vector.zeroCreate clContext workGroupSize
Expand All @@ -145,9 +145,6 @@ module internal BFS =
let maskComplementedInPlace =
Vector.map2InPlace Mask.complementedOp clContext workGroupSize

let maskComplemented =
Vector.map2Sparse Mask.complementedOp clContext workGroupSize

let fillSubVectorInPlace =
Vector.assignByMaskInPlace (Mask.assign) clContext workGroupSize

Expand Down Expand Up @@ -190,28 +187,21 @@ module internal BFS =
match frontier with
| ClVector.Sparse _ ->
//Getting new frontier
match spMSpV queue matrix frontier with
match spMSpV queue matrix frontier levels with
| None ->
frontier.Dispose()
stop <- true
| Some newFrontier ->
| Some newMaskedFrontier ->
frontier.Dispose()
//Filtering visited vertices
match maskComplemented queue DeviceOnly newFrontier levels with
| None ->
stop <- true
newFrontier.Dispose()
| Some newMaskedFrontier ->
newFrontier.Dispose()

//Push/pull
let NNZ = getNNZ queue newMaskedFrontier

if (push NNZ newMaskedFrontier.Size) then
frontier <- newMaskedFrontier
else
frontier <- toDense queue DeviceOnly newMaskedFrontier
newMaskedFrontier.Dispose()

//Push/pull
let NNZ = getNNZ queue newMaskedFrontier

if (push NNZ newMaskedFrontier.Size) then
frontier <- newMaskedFrontier
else
frontier <- toDense queue DeviceOnly newMaskedFrontier
newMaskedFrontier.Dispose()
| ClVector.Dense oldFrontier ->
//Getting new frontier
spMVInPlace queue matrix frontier frontier
Expand Down
4 changes: 2 additions & 2 deletions src/GraphBLAS-sharp.Backend/Common/ClArray.fs
Original file line number Diff line number Diff line change
Expand Up @@ -906,14 +906,14 @@ module ClArray =
<@ fun (ndRange: Range1D) (length: int) (array: ClArray<'a>) (count: ClCell<int>) ->
let gid = ndRange.GlobalID0
let mutable countLocal = 0
let gSize = ndRange.GlobalWorkSize
let step = ndRange.GlobalWorkSize

let mutable i = gid

while i < length do
let res = (%predicate) array.[i]
if res then countLocal <- countLocal + 1
i <- i + gSize
i <- i + step

atomic (+) count.Value countLocal |> ignore @>

Expand Down
46 changes: 45 additions & 1 deletion src/GraphBLAS-sharp.Backend/Operations/Operations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ module Operations =
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication. Optimized for bool OR and AND operations.
/// CSR Matrix - sparse vector multiplication. Optimized for bool OR and AND operations by skipping reduction stage.
/// </summary>
/// <param name="add">Type of binary function to reduce entries.</param>
/// <param name="mul">Type of binary function to combine entries.</param>
Expand All @@ -352,6 +352,50 @@ module Operations =
| ClMatrix.CSR m, ClVector.Sparse v -> Option.map ClVector.Sparse (run queue m v)
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication with mask. Mask is complemented.
/// </summary>
/// <param name="add">Type of binary function to reduce entries.</param>
/// <param name="mul">Type of binary function to combine entries.</param>
/// <param name="clContext">OpenCL context.</param>
/// <param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
let SpMSpVMasked
(add: Expr<'c option -> 'c option -> 'c option>)
(mul: Expr<'a option -> 'b option -> 'c option>)
(clContext: ClContext)
workGroupSize
=

let run =
SpMSpV.Masked.runMasked add mul clContext workGroupSize

fun (queue: RawCommandQueue) (matrix: ClMatrix<'a>) (vector: ClVector<'b>) (mask: ClVector<'d>) ->
match matrix, vector, mask with
| ClMatrix.CSR m, ClVector.Sparse v, ClVector.Dense mask -> Option.map ClVector.Sparse (run queue m v mask)
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication with mask. Mask is complemented. Optimized for bool OR and AND operations by skipping reduction stage.
/// </summary>
/// <param name="add">Type of binary function to reduce entries.</param>
/// <param name="mul">Type of binary function to combine entries.</param>
/// <param name="clContext">OpenCL context.</param>
/// <param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
let SpMSpVMaskedBool
(add: Expr<bool option -> bool option -> bool option>)
(mul: Expr<bool option -> bool option -> bool option>)
(clContext: ClContext)
workGroupSize
=

let run =
SpMSpV.Masked.runMaskedBoolStandard add mul clContext workGroupSize

fun (queue: RawCommandQueue) (matrix: ClMatrix<'a>) (vector: ClVector<'b>) (mask: ClVector<'d>) ->
match matrix, vector, mask with
| ClMatrix.CSR m, ClVector.Sparse v, ClVector.Dense mask -> Option.map ClVector.Sparse (run queue m v mask)

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Linux Release Build (No Tests)

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Linux Debug Build (No Tests)

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Windows Debug Build (No Tests)

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Windows Release Build And Test

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Windows Release Build (No Tests)

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication.
/// </summary>
Expand Down
215 changes: 215 additions & 0 deletions src/GraphBLAS-sharp.Backend/Operations/SpMSpV.fs
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,218 @@ module SpMSpV =
Indices = resultIndices
Values = create queue DeviceOnly resultIndices.Length true
Size = matrix.ColumnCount })

module Masked =

let private count (clContext: ClContext) workGroupSize =

let count =
<@ fun (ndRange: Range1D) vectorLength (vectorIndices: ClArray<int>) (vectorMask: ClArray<'d option>) (matrixRowPointers: ClArray<int>) (matrixColumns: ClArray<int>) (result: ClCell<int>) ->
let gid = ndRange.GlobalID0
let step = ndRange.GlobalWorkSize

let mutable idx = gid

while idx < vectorLength do
let vectorIndex = vectorIndices.[idx]

let rowStart = matrixRowPointers.[vectorIndex]
let rowEnd = matrixRowPointers.[vectorIndex + 1]

let mutable count = 0

for i in rowStart .. rowEnd - 1 do
match vectorMask.[matrixColumns.[i]] with
| None -> count <- count + 1
| Some _ -> ()

atomic (+) result.Value count |> ignore

idx <- idx + step @>

let count = clContext.Compile count

fun (queue: RawCommandQueue) (matrix: ClMatrix.CSR<'a>) (vector: ClVector.Sparse<'b>) (vectorMask: ClArray<'d option>) ->

let length = vector.NNZ

let numberOfGroups =
Utils.divUpClamp length workGroupSize 1 1024

let result = clContext.CreateClCell(0)

let ndRange =
Range1D.CreateValid(numberOfGroups * workGroupSize, workGroupSize)

let count = count.GetKernel()

count.KernelFunc ndRange length vector.Indices vectorMask matrix.RowPointers matrix.Columns result

queue.RunKernel count

result.ToHostAndFree queue

let private multiplyValues
(clContext: ClContext)
(mul: Expr<'a option -> 'b option -> 'c option>)
workGroupSize
=

let multiply =
<@ fun (ndRange: Range1D) resultLength (vectorIndices: ClArray<int>) (vectorValues: ClArray<'b>) (vectorMask: ClArray<'d option>) (matrixRowPointers: ClArray<int>) (matrixColumns: ClArray<int>) (matrixValues: ClArray<'a>) (resultOffset: ClCell<int>) (resultIndices: ClArray<int>) (resultValues: ClArray<'c option>) ->
let gid = ndRange.GlobalID0
let step = ndRange.GlobalWorkSize

let mutable i = gid

while i < resultLength do
let vectorIndex = vectorIndices.[i]
let vectorValue = vectorValues.[i]

let rowStart = matrixRowPointers.[vectorIndex]
let rowEnd = matrixRowPointers.[vectorIndex + 1]

let mutable count = 0

for i in rowStart .. rowEnd - 1 do
match vectorMask.[matrixColumns.[i]] with
| None -> count <- count + 1
| Some _ -> ()

let mutable offset = atomic (+) resultOffset.Value count

for i in rowStart .. rowEnd - 1 do
let columnIndex = matrixColumns.[i]

// TODO: Pass mask operation
match vectorMask.[columnIndex] with
| None ->
resultIndices.[offset] <- columnIndex
resultValues.[offset] <- (%mul) (Some matrixValues.[i]) (Some vectorValue)
offset <- offset + 1
| Some _ -> ()

i <- i + step @>

let kernel = clContext.Compile multiply

fun (queue: RawCommandQueue) (matrix: ClMatrix.CSR<'a>) (vector: ClVector.Sparse<'b>) (vectorMask: ClArray<'d option>) (resultSize: int) ->

let multipliedIndices =
clContext.CreateClArrayWithSpecificAllocationMode<int>(DeviceOnly, resultSize)

let multipliedValues =
clContext.CreateClArrayWithSpecificAllocationMode<'c option>(DeviceOnly, resultSize)

let offset = clContext.CreateClCell 0

let numberOfGroups =
Utils.divUpClamp vector.NNZ workGroupSize 1 1024

let ndRange =
Range1D.CreateValid(numberOfGroups * workGroupSize, workGroupSize)

let kernel = kernel.GetKernel()

kernel.KernelFunc
ndRange
vector.NNZ
vector.Indices
vector.Values
vectorMask
matrix.RowPointers
matrix.Columns
matrix.Values
offset
multipliedIndices
multipliedValues

queue.RunKernel kernel

offset.Free()

multipliedIndices, multipliedValues

let runMasked
(add: Expr<'c option -> 'c option -> 'c option>)
(mul: Expr<'a option -> 'b option -> 'c option>)
(clContext: ClContext)
workGroupSize
=

let count = count clContext workGroupSize

let multiplyValues =
multiplyValues clContext mul workGroupSize

let sort =
Sort.Bitonic.sortKeyValuesInplace clContext workGroupSize

let segReduce =
Reduce.ByKey.Option.segmentSequential add clContext workGroupSize

fun (queue: RawCommandQueue) (matrix: ClMatrix.CSR<'a>) (vector: ClVector.Sparse<'b>) (mask: ClArray<'d option>) ->

match count queue matrix vector mask with
| 0 -> None
| resultSize ->
let multipliedIndices, multipliedValues =
multiplyValues queue matrix vector mask resultSize

sort queue multipliedIndices multipliedValues

let result =
segReduce queue DeviceOnly multipliedIndices multipliedValues
|> Option.map
(fun (reducedValues, reducedKeys) ->
{ Context = clContext
Indices = reducedKeys
Values = reducedValues
Size = matrix.ColumnCount })

multipliedIndices.Free()
multipliedValues.Free()

result

let runMaskedBoolStandard
(add: Expr<'c option -> 'c option -> 'c option>)
(mul: Expr<'a option -> 'b option -> 'c option>)
(clContext: ClContext)
workGroupSize
=

let count = count clContext workGroupSize

let multiplyValues =
multiplyValues clContext mul workGroupSize

let sort =
Sort.Bitonic.sortKeyValuesInplace clContext workGroupSize

let removeDuplicates =
GraphBLAS.FSharp.ClArray.removeDuplications clContext workGroupSize

let create =
GraphBLAS.FSharp.ClArray.create clContext workGroupSize

fun (queue: RawCommandQueue) (matrix: ClMatrix.CSR<'a>) (vector: ClVector.Sparse<'b>) (mask: ClArray<'d option>) ->

match count queue matrix vector mask with
| 0 -> None
| resultSize ->
let multipliedIndices, multipliedValues =
multiplyValues queue matrix vector mask resultSize

sort queue multipliedIndices multipliedValues

let resultIndices = removeDuplicates queue multipliedIndices

multipliedIndices.Free()
multipliedValues.Free()

Some
<| { Context = clContext
Indices = resultIndices
Values = create queue DeviceOnly resultIndices.Length true
Size = matrix.ColumnCount }
Loading

0 comments on commit b6e13fd

Please sign in to comment.