Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions lib/system/alloc.nim
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ type

BigChunk = object of BaseChunk # not necessarily > PageSize!
next, prev: PBigChunk # chunks of the same (or bigger) size
alignOffset: uint16 # offset from data start to actual Cell (for aligned allocs)
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory

HeapLinks = object
Expand Down Expand Up @@ -477,7 +478,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
a = a +% size
else:
let c = cast[PBigChunk](c)
yield addr(c.data)
# Yield the aligned address that was actually returned to user
yield addr(c.data) +! c.alignOffset
m.locked = false

proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
Expand Down Expand Up @@ -777,7 +779,9 @@ proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
when not defined(gcDestructors):
a.deleted = getBottom(a)
del(a, a.root, cast[int](addr(c.data)))
# Use the same address that was added during allocation (accounting for alignment)
let alignedDataAddr = cast[int](addr(c.data)) +% c.alignOffset.int
del(a, a.root, alignedDataAddr)
if c.size >= HugeChunkSize: freeHugeChunk(a, c)
else: freeBigChunk(a, c)

Expand Down Expand Up @@ -845,7 +849,13 @@ when defined(heaptrack):
proc heaptrack_malloc(a: pointer, size: int) {.cdecl, importc, dynlib: heaptrackLib.}
proc heaptrack_free(a: pointer) {.cdecl, importc, dynlib: heaptrackLib.}

proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
proc applyAlignment(basePtr: pointer, alignment: int, offset: int, c: PBigChunk): pointer {.inline.} =
let alignedUserData = align(cast[int](basePtr) +% offset, alignment)
let finalResult = alignedUserData -% offset
c.alignOffset = cast[uint16](finalResult -% cast[int](basePtr))
result = cast[pointer](finalResult)

proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = MemAlign, offset: int = 0): pointer =
when defined(nimTypeNames):
inc(a.allocCounter)
sysAssert(allocInv(a), "rawAlloc: begin")
Expand All @@ -855,7 +865,9 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
sysAssert(size >= requestedSize, "insufficient allocated size!")
#c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)

if size <= SmallChunkSize-smallChunkOverhead():
# For custom alignments > MemAlign, force big chunk allocation
# Small chunks cannot handle arbitrary alignments due to fixed cell boundaries
if size <= SmallChunkSize-smallChunkOverhead() and alignment <= MemAlign:
template fetchSharedCells(tc: PSmallChunk) =
# Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
when defined(gcDestructors):
Expand Down Expand Up @@ -950,13 +962,22 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
if deferredFrees != nil:
freeDeferredObjects(a, deferredFrees)

size = requestedSize + bigChunkOverhead() # roundup(requestedSize+bigChunkOverhead(), PageSize)
# For big chunks with custom alignment, allocate extra space for alignment adjustment
size = requestedSize + bigChunkOverhead()
if alignment > MemAlign:
size += alignment - 1
# allocate a large block
var c = if size >= HugeChunkSize: getHugeChunk(a, size)
else: getBigChunk(a, size)
sysAssert c.prev == nil, "rawAlloc 10"
sysAssert c.next == nil, "rawAlloc 11"
result = addr(c.data)
# Apply alignment if needed: align (result + offset) to alignment boundary
if alignment > MemAlign:
result = applyAlignment(result, alignment, offset, c)
else:
c.alignOffset = 0

sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
when not defined(gcDestructors):
Expand All @@ -970,8 +991,8 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
when defined(heaptrack):
heaptrack_malloc(result, requestedSize)

proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
result = rawAlloc(a, requestedSize)
proc rawAlloc0(a: var MemRegion, requestedSize: int, alignment: int = MemAlign, offset: int = 0): pointer =
result = rawAlloc(a, requestedSize, alignment, offset)
zeroMem(result, requestedSize)

proc rawDealloc(a: var MemRegion, p: pointer) =
Expand Down Expand Up @@ -1067,7 +1088,9 @@ when not defined(gcDestructors):
(cast[ptr FreeCell](p).zeroField >% 1)
else:
var c = cast[PBigChunk](c)
result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
# Use stored alignOffset to find the actual Cell location
let cellPtr = addr(c.data) +! c.alignOffset
result = p == cellPtr and cast[ptr FreeCell](p).zeroField >% 1

proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
a.minLargeObj = lowGauge(a.root)
Expand All @@ -1091,7 +1114,8 @@ when not defined(gcDestructors):
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
else:
var c = cast[PBigChunk](c)
var d = addr(c.data)
# Use stored alignment offset to find the actual Cell location
var d = addr(c.data) +! c.alignOffset
if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
result = d
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
Expand All @@ -1104,7 +1128,8 @@ when not defined(gcDestructors):
if avlNode != nil:
var k = cast[pointer](avlNode.key)
var c = cast[PBigChunk](pageAddr(k))
sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
# k should be the aligned address (addr(c.data) + alignOffset)
sysAssert(addr(c.data) +! c.alignOffset == k, " k is not the aligned address!")
if cast[ptr FreeCell](k).zeroField >% 1:
result = k
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
Expand Down
14 changes: 10 additions & 4 deletions lib/system/gc.nim
Original file line number Diff line number Diff line change
Expand Up @@ -458,9 +458,12 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
sysAssert(allocInv(gch.region), "rawNewObj begin")
gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
collectCT(gch)
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
# Use alignment from typ.base if available, otherwise use MemAlign
let alignment = if typ.kind == tyRef and typ.base != nil: max(typ.base.align, MemAlign) else: MemAlign
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell), alignment, sizeof(Cell)))
#gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
# Check that the user data (after the Cell header) is properly aligned
gcAssert((cast[int](cellToUsr(res)) and (alignment-1)) == 0, "newObj: 2")
# now it is buffered in the ZCT
res.typ = typ
setFrameInfo(res)
Expand Down Expand Up @@ -508,9 +511,12 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline, raise
collectCT(gch)
sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")

var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
# Use alignment from typ.base if available, otherwise use MemAlign
let alignment = if typ.base != nil: max(typ.base.align, MemAlign) else: MemAlign
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell), alignment, sizeof(Cell)))
sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
# Check that the user data (after the Cell header) is properly aligned
sysAssert((cast[int](cellToUsr(res)) and (alignment-1)) == 0, "newObj: 2")
# now it is buffered in the ZCT
res.typ = typ
setFrameInfo(res)
Expand Down
102 changes: 102 additions & 0 deletions tests/align/talign.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
discard """
ccodeCheck: "\\i @'NIM_ALIGN(128) NI mylocal1' .*"
matrix: "--mm:refc -d:useGcAssert -d:useSysAssert; --mm:orc"
targets: "c cpp"
output: "align ok"
"""
Expand Down Expand Up @@ -67,3 +68,104 @@ block: # bug #22419

f()()


type Xxx = object
v {.align: 128.}: byte

type Yyy = object
v: byte
v2: Xxx

for i in 0..<3:
let x = new Yyy
# echo "addr v2.v:", cast[uint](addr x.v2.v)
doAssert cast[uint](addr x.v2.v) mod 128 == 0

let m = new Yyy
m.v2.v = 42
doAssert m.v2.v == 42
m.v = 7
doAssert m.v == 7


type
MyType16 = object
a {.align(16).}: int


var x: array[10, ref MyType16]
for q in 0..500:
for i in 0..<x.len:
new x[i]
x[i].a = q
doAssert(cast[int](x[i]) mod alignof(MyType16) == 0)

type
MyType32 = object
a{.align(32).}: int

var y: array[10, ref MyType32]
for q in 0..500:
for i in 0..<y.len:
new y[i]
y[i].a = q
doAssert(cast[int](y[i]) mod alignof(MyType32) == 0)

# Additional tests: allocate custom aligned objects using `new`
type
MyType64 = object
a{.align(64).}: int

var z: array[10, ref MyType64]
for q in 0..500:
for i in 0..<z.len:
new z[i]
z[i].a = q
doAssert(cast[int](z[i]) mod alignof(MyType64) == 0)

type
MyType128 = object
a{.align(128).}: int

var w: array[10, ref MyType128]
for q in 0..500:
for i in 0..<w.len:
new w[i]
w[i].a = q
doAssert(cast[int](w[i]) mod alignof(MyType128) == 0)

# Nested aligned-object tests
type
Inner128 = object
v {.align(128).}: byte

OuterWithInner = object
prefix: int
inner: Inner128

var outerArr: array[8, ref OuterWithInner]
for q in 0..200:
for i in 0..<outerArr.len:
new outerArr[i]
# write to inner to ensure it's allocated
outerArr[i].inner.v = cast[byte](q and 0xFF)
doAssert(cast[uint](addr outerArr[i].inner) mod uint(alignof(Inner128)) == 0)

# Nested two-level alignment
type
DeepInner = object
b {.align(128).}: int

Mid = object
di: DeepInner

Top = object
m: Mid

var topArr: array[4, ref Top]
for q in 0..100:
for i in 0..<topArr.len:
new topArr[i]
topArr[i].m.di.b = q
doAssert(cast[uint](addr topArr[i].m.di) mod uint(alignof(DeepInner)) == 0)

Loading