Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move bounds checks to constructor #32

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
16 changes: 16 additions & 0 deletions src/bit_vector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@ end
struct BlobBitVector <: AbstractArray{Bool, 1}
data::Blob{UInt64}
length::Int64

function BlobBitVector(data::Blob{UInt64}, length::Int64)
@assert length >= 0
@boundscheck begin
if div(length, self_size(UInt64), RoundUp) > available_size(data)
throw(InvalidBlobError(
BlobBitVector,
getfield(data, :base),
getfield(data, :offset),
getfield(data, :limit),
div(length + 7, 8)),
)
end
end
new(data, length)
end
end

Base.@propagate_inbounds function get_address(blob::BlobBitVector, i::Int)::BlobBit
Expand Down
139 changes: 118 additions & 21 deletions src/blob.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,58 @@
struct InvalidBlobError <: Exception
type::Type
base::Ptr{Nothing}
offset::Int64
limit::Int64
length::Int64
end

function Base.showerror(io::IO, e::InvalidBlobError)
print(io, "InvalidBlobError: $(e.type) needs $(e.length) * $(self_size(e.type)) bytes. \
Got length($(e.offset):$(e.limit)) == $(e.limit - e.offset) bytes")
end

"""
A pointer to a `T` stored inside a Blob.
Blob{T}

A pointer to a memory array that stores a `T`.

The fields are stored compact in memory without alignment, i.e each basic field f takes up
`sizeof(fieldtype(T, :f))` bytes. Blobs inside `T` take just the offset, i.e. 8 bytes.
This is different from Julia memory layout.

You can just store struct of only primitive types or structs of primitive out of the box,
for example:

```julia
struct Foo
x::Int64
y::Float64
end

blob = Blobs.malloc(Foo)
blob[] = Foo(42, 3.14)

In order to store variable size data structures (`BlobVector`, `BlobBitVector`,
`BlobString` or your own implementation) or a `Blob``, you need to implement `child_size`
and `init` for your type.

Example:
```julia
struct FooString
s::BlobString
i::Int64
end

function Blobs.child_size(::FooString, string_length::Int64)
return child_size(BlobString, string_length)
end

function Blobs.init(blob::Blob{FooString}, free::Blob{Nothing}, string_length::Int64)
free = Blobs.init(blob.s, free, string_length)
blob.i[] = 0
return free
end
```
"""
struct Blob{T}
base::Ptr{Nothing}
Expand All @@ -8,54 +61,99 @@ struct Blob{T}

function Blob{T}(base::Ptr{Nothing}, offset::Int64, limit::Int64) where {T}
@assert isbitstype(T)
new(base, offset, limit)
@boundscheck _bounds_check(base, offset, limit, self_size(T), T)
new{T}(base, offset, limit)
end
end

@noinline function _bounds_check(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the rationale for making this @noinline? Julia doesn't do it for checkbounds(::AbstractArray, I...), for example.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for the large amount of generate llvm code was the bounds check (building the strings etc). Therefore, I want to avoid that this is getting inlined into the specialized function.

base::Ptr{Nothing},
offset::Int64,
limit::Int64,
self_size_T::Int64,
@nospecialize(T::DataType))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this @nospecialize actually do anything?

Julia doesn't specialize arguments that look like t::Type. (It does specialize arguments like ::Type{T}) where T.)

julia> foo(base::Int64, t::DataType) = base+sizeof(t)
foo (generic function with 1 method)

julia> Base.specializations(@which foo(3,Int))
Base.MethodSpecializations(MethodInstance for foo(::Int64, ::DataType))

julia> Base.specializations(@which foo(3,UInt8))
Base.MethodSpecializations(MethodInstance for foo(::Int64, ::DataType))

julia> bar(base::Int64, ::Type{T}) where T = base+sizeof(T)
bar (generic function with 1 method)

julia> Base.specializations(@which bar(3,Int))
Base.MethodSpecializations(MethodInstance for bar(::Int64, ::Type{Int64}))

julia> Base.specializations(@which bar(3,UInt8))
Base.MethodSpecializations(svec(MethodInstance for bar(::Int64, ::Type{Int64}), MethodInstance for bar(::Int64, ::Type{UInt8}), nothing, nothing, nothing, nothing, nothing))

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, good catch. I added it because I was surprised that there is still more code generated by the constructor (I believe). But it didn't change anything for the reason that you point out.

if offset < 0 || offset + self_size_T > limit
throw(InvalidBlobError(Blob{T}, base, offset, limit, 1))
end
if limit > 0 && base == Ptr{Nothing}(0)
throw(AssertionError("Null pointer reference Blob{$(T)}"))
end
end

"""
Blob{T}(ref::Base.RefValue{T}) where T

Create a `Blob{T}` from an Julia allocated object.
**Danger**: This only works if memory layout of Julia struct is the same as of the Blob.
"""
function Blob(ref::Base.RefValue{T}) where T
Blob{T}(pointer_from_objref(ref), 0, sizeof(T))
@assert self_size(T) == sizeof(T) "$(T) cannot of aligned fields or Blobs"
@inbounds Blob{T}(pointer_from_objref(ref), 0, self_size(T))
end

"""
Blob{T}(base::Ptr{T}, offset::Int64 = 0, limit::Int64 = sizeof(T)) where T

Create a `Blob{T}` from a pointer.
"""
Base.@propagate_inbounds \
function Blob(base::Ptr{T}, offset::Int64 = 0, limit::Int64 = sizeof(T)) where {T}
Blob{T}(Ptr{Nothing}(base), offset, limit)
end

function Blob{T}(blob::Blob) where T
"""
Blob{T}(blob::Blob)

Make a copy and potentially change the type of a `Blob`.
"""
Base.@propagate_inbounds function Blob{T}(blob::Blob) where T
Blob{T}(getfield(blob, :base), getfield(blob, :offset), getfield(blob, :limit))
robertbuessow marked this conversation as resolved.
Show resolved Hide resolved
end

"""
available_size(blob::Blob{T}) where T

The size of memory this `Blob` and it's children own. `blob.limit - blob.offset`.
"""
available_size(blob::Blob{T}) where T = getfield(blob, :limit) - getfield(blob, :offset)

function assert_same_allocation(blob1::Blob, blob2::Blob)
@assert getfield(blob1, :base) == getfield(blob2, :base) "These blobs do not share the same allocation: $blob1 - $blob2"
end

""""
pointer(blob::Blob{T}) where T

Get a pointer to the data in the `blob`. Note that you cannot `unsafe_load`
from this pointer, since the data is not aligned.
"""
function Base.pointer(blob::Blob{T}) where T
convert(Ptr{T}, getfield(blob, :base) + getfield(blob, :offset))
end

function Base.:+(blob::Blob{T}, offset::Integer) where T
""""
Base:+(::Blob, ::Integer)

Increase the offset of a `Blob` by `offset`.
"""
Base.@propagate_inbounds function Base.:+(blob::Blob{T}, offset::Integer) where T
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's traditional in the C world to allow people to construct a pointer that goes just off the end of an array. (But not to access that pointer)

This code prohibits the construction of the pointer. Is that where we want to go?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can created a Blob{Nothing}(<someptr>, 0, 0). That actually happening. I don't think a Blob is an array so don't see a need for something else.

Blob{T}(getfield(blob, :base), getfield(blob, :offset) + offset, getfield(blob, :limit))
end

"""
Base:-(::Blob, ::Blob)

Get the offset difference of two blobs in the same allocation.
"""
function Base.:-(blob1::Blob, blob2::Blob)
assert_same_allocation(blob1, blob2)
getfield(blob1, :offset) - getfield(blob2, :offset)
end

@inline function boundscheck(blob::Blob{T}) where T
@boundscheck begin
if (getfield(blob, :offset) < 0) || (getfield(blob, :offset) + self_size(T) > getfield(blob, :limit))
throw(BoundsError(blob))
end
@assert (getfield(blob, :base) != Ptr{Nothing}(0)) "Null pointer dereference in $(typeof(blob))"
end
end

Base.@propagate_inbounds function Base.getindex(blob::Blob{T}) where T
boundscheck(blob)
function Base.getindex(blob::Blob{T}) where T
unsafe_load(blob)
end

# TODO(jamii) do we need to align data?
"""
self_size(::Type{T}, args...) where {T}

Expand Down Expand Up @@ -88,22 +186,21 @@ end

@generated function Base.getindex(blob::Blob{T}, ::Type{Val{field}}) where {T, field}
i = findfirst(isequal(field), fieldnames(T))
@assert i != nothing "$T has no field $field"
@assert i !== nothing "$T has no field $field"
quote
$(Expr(:meta, :inline))
Blob{$(fieldtype(T, i))}(blob + $(blob_offset(T, i)))
@inbounds Blob{$(fieldtype(T, i))}(blob) + $(blob_offset(T, i))
end
end

@inline function Base.getindex(blob::Blob{T}, i::Int) where {T}
@boundscheck if i < 1 || i > fieldcount(T)
throw(BoundsError(blob, i))
end
return Blob{fieldtype(T, i)}(blob + Blobs.blob_offset(T, i))
return @inbounds Blob{fieldtype(T, i)}(blob) + Blobs.blob_offset(T, i)
end

Base.@propagate_inbounds function Base.setindex!(blob::Blob{T}, value::T) where T
boundscheck(blob)
unsafe_store!(blob, value)
end

Expand Down
19 changes: 11 additions & 8 deletions src/layout.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Initialize `blob`.
Assumes that `blob` it at least `self_size(T) + child_size(T, args...)` bytes long.
"""
function init(blob::Blob{T}, args...) where T
init(blob, Blob{Nothing}(blob + self_size(T)), args...)
init(blob, Blob{Nothing}(blob) + self_size(T), args...)
end

"""
Expand Down Expand Up @@ -78,7 +78,9 @@ Allocate an uninitialized `Blob{T}`.
"""
function malloc(::Type{T}, args...)::Blob{T} where T
size = self_size(T) + child_size(T, args...)
Blob{T}(Libc.malloc(size), 0, size)
base = Libc.malloc(size)
base == Ptr{Nothing}(0) && throw(OutOfMemoryError())
return @inbounds Blob{T}(base, 0, size)
end

"""
Expand All @@ -88,7 +90,9 @@ Allocate a zero-initialized `Blob{T}`.
"""
function calloc(::Type{T}, args...)::Blob{T} where T
size = self_size(T) + child_size(T, args...)
Blob{T}(Libc.calloc(1, size), 0, size)
base = Libc.calloc(1, size)
base == Ptr{Nothing}(0) && throw(OutOfMemoryError())
return @inbounds Blob{T}(base, 0, size)
end

"""
Expand All @@ -97,11 +101,10 @@ end
Allocate and initialize a new `Blob{T}`.
"""
function malloc_and_init(::Type{T}, args...)::Blob{T} where T
size = self_size(T) + child_size(T, args...)
blob = Blob{T}(Libc.malloc(size), 0, size)
used = init(blob, args...)
@assert used - blob == size
blob
blob = malloc(T, args...)
used = @inbounds init(blob, args...)
@assert used - blob == available_size(blob)
return blob
end

"""
Expand Down
14 changes: 13 additions & 1 deletion src/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
struct BlobString <: AbstractString
data::Blob{UInt8}
len::Int64 # in bytes

function BlobString(data::Blob{UInt8}, len::Int64)
@assert len >= 0
@boundscheck begin
if len * self_size(UInt8) > available_size(data)
throw(InvalidBlobError(
BlobString, getfield(data, :base), getfield(data, :offset),
getfield(data, :limit), len),
)
end
end
new(data, len)
end
end

Base.pointer(blob::BlobString) = pointer(blob, 1)
Expand Down Expand Up @@ -187,4 +200,3 @@ end
## overload methods for efficiency ##

Base.isvalid(s::BlobString, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i

13 changes: 13 additions & 0 deletions src/vector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
struct BlobVector{T} <: AbstractArray{T, 1}
data::Blob{T}
length::Int64

function BlobVector{T}(data::Blob{T}, length::Int64) where T
@assert length >= 0
@boundscheck begin
if length * self_size(T) > available_size(data)
throw(InvalidBlobError(
BlobVector{T}, getfield(data, :base), getfield(data, :offset),
getfield(data, :limit), length),
)
end
end
new{T}(data, length)
end
end

function Base.pointer(bv::BlobVector{T}, i::Integer=1) where {T}
Expand Down
Loading