@@ -71,17 +71,7 @@ type Head struct {
7171
7272 tombstones memTombstones
7373
74- // Mutex for accessing writeLastId and writesOpen.
75- writeMtx sync.Mutex
76- // Each write is given an internal id.
77- lastWriteID uint64
78- // Which writes are currently in progress.
79- writesOpen map [uint64 ]struct {}
80- // Mutex for accessing readLastId.
81- // If taking both writeMtx and readMtx, take writeMtx first.
82- readMtx sync.Mutex
83- // All current in use isolationStates. This is a doubly-linked list.
84- readsOpen * IsolationState
74+ iso * isolation
8575}
8676
8777type headMetrics struct {
@@ -166,15 +156,15 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
166156 Name : "tsdb_isolation_low_watermark" ,
167157 Help : "The lowest write id that is still referenced." ,
168158 }, func () float64 {
169- return float64 (h .readLowWatermark ())
159+ return float64 (h .iso . lowWatermark ())
170160 })
171161 m .highWatermark = prometheus .NewGaugeFunc (prometheus.GaugeOpts {
172162 Name : "tsdb_isolation_high_watermark" ,
173163 Help : "The highest write id that has been given out." ,
174164 }, func () float64 {
175- h .writeMtx .Lock ()
176- defer h .writeMtx .Unlock ()
177- return float64 (h .lastWriteID )
165+ h .iso . writeMtx .Lock ()
166+ defer h .iso . writeMtx .Unlock ()
167+ return float64 (h .iso . lastWriteID )
178168 })
179169
180170 if r != nil {
@@ -210,9 +200,6 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (
210200 if chunkRange < 1 {
211201 return nil , errors .Errorf ("invalid chunk range %d" , chunkRange )
212202 }
213- headIso := & IsolationState {}
214- headIso .next = headIso
215- headIso .prev = headIso
216203
217204 h := & Head {
218205 wal : wal ,
@@ -226,8 +213,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (
226213 postings : index .NewUnorderedMemPostings (),
227214 tombstones : memTombstones {},
228215
229- writesOpen : map [uint64 ]struct {}{},
230- readsOpen : headIso ,
216+ iso : newIsolation (),
231217 }
232218 h .metrics = newHeadMetrics (h , r )
233219
@@ -428,7 +414,7 @@ func (h *rangeHead) Index() (IndexReader, error) {
428414}
429415
430416func (h * rangeHead ) Chunks () (ChunkReader , error ) {
431- return h .head .chunksRange (h .mint , h .maxt , h .head .IsolationState ()), nil
417+ return h .head .chunksRange (h .mint , h .maxt , h .head .iso . State ()), nil
432418}
433419
434420func (h * rangeHead ) Tombstones () (TombstoneReader , error ) {
@@ -480,13 +466,8 @@ func (a *initAppender) Rollback() error {
480466func (h * Head ) Appender () Appender {
481467 h .metrics .activeAppenders .Inc ()
482468
483- h .writeMtx .Lock ()
484- h .lastWriteID ++
485- writeID := h .lastWriteID
486- h .writesOpen [writeID ] = struct {}{}
487- h .writeMtx .Unlock ()
488-
489- cleanupWriteIDsBelow := h .readLowWatermark ()
469+ writeID := h .iso .newWriteID ()
470+ cleanupWriteIDsBelow := h .iso .lowWatermark ()
490471
491472 // The head cache might not have a starting point yet. The init appender
492473 // picks up the first appended timestamp as the base.
@@ -614,9 +595,7 @@ func (a *headAppender) Commit() error {
614595 }
615596 }
616597
617- a .head .writeMtx .Lock ()
618- delete (a .head .writesOpen , a .writeID )
619- a .head .writeMtx .Unlock ()
598+ a .head .iso .closeWrite (a .writeID )
620599
621600 return nil
622601}
@@ -725,14 +704,14 @@ func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
725704
726705// Chunks returns a ChunkReader against the block.
727706func (h * Head ) Chunks () (ChunkReader , error ) {
728- return h .chunksRange (math .MinInt64 , math .MaxInt64 , h .IsolationState ()), nil
707+ return h .chunksRange (math .MinInt64 , math .MaxInt64 , h .iso . State ()), nil
729708}
730709
731- func (h * Head ) chunksRange (mint , maxt int64 , isolation * IsolationState ) * headChunkReader {
710+ func (h * Head ) chunksRange (mint , maxt int64 , isoState * IsolationState ) * headChunkReader {
732711 if hmin := h .MinTime (); hmin > mint {
733712 mint = hmin
734713 }
735- return & headChunkReader {head : h , mint : mint , maxt : maxt , isolation : isolation }
714+ return & headChunkReader {head : h , mint : mint , maxt : maxt , isoState : isoState }
736715}
737716
738717// MinTime returns the lowest time bound on visible data in the head.
@@ -754,11 +733,11 @@ type headChunkReader struct {
754733 head * Head
755734 mint , maxt int64
756735
757- isolation * IsolationState
736+ isoState * IsolationState
758737}
759738
760739func (h * headChunkReader ) Close () error {
761- h .isolation .Close ()
740+ h .isoState .Close ()
762741 return nil
763742}
764743
@@ -809,7 +788,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
809788 s : s ,
810789 cid : int (cid ),
811790
812- isolation : h .isolation ,
791+ isoState : h .isoState ,
813792 }, nil
814793}
815794
@@ -818,12 +797,12 @@ type safeChunk struct {
818797 s * memSeries
819798 cid int
820799
821- isolation * IsolationState
800+ isoState * IsolationState
822801}
823802
824803func (c * safeChunk ) Iterator () chunkenc.Iterator {
825804 c .s .Lock ()
826- it := c .s .iterator (c .cid , c .isolation )
805+ it := c .s .iterator (c .cid , c .isoState )
827806 c .s .Unlock ()
828807 return it
829808}
@@ -983,20 +962,6 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
983962 return s , true
984963}
985964
986- // readLowWatermark returns the writeId below which
987- // we no longer need to track which writes were from
988- // which writeId.
989- func (h * Head ) readLowWatermark () uint64 {
990- h .writeMtx .Lock () // Take writeMtx first.
991- defer h .writeMtx .Unlock ()
992- h .readMtx .Lock ()
993- defer h .readMtx .Unlock ()
994- if h .readsOpen .prev == h .readsOpen {
995- return h .lastWriteID
996- }
997- return h .readsOpen .prev .lowWaterMark
998- }
999-
1000965// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
1001966// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
1002967// Its methods require the hash to be submitted with it to avoid re-computations throughout
@@ -1185,10 +1150,7 @@ type memSeries struct {
11851150
11861151 app chunkenc.Appender // Current appender for the chunk.
11871152
1188- // Write ids of most recent samples. This is a ring buffer.
1189- writeIDs []uint64
1190- writeIDFirst int // Position of first id in the ring.
1191- writeIDCount int // How many ids in the ring.
1153+ txs * txRing
11921154}
11931155
11941156func (s * memSeries ) minTime () int64 {
@@ -1225,7 +1187,7 @@ func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
12251187 ref : id ,
12261188 chunkRange : chunkRange ,
12271189 nextAt : math .MinInt64 ,
1228- writeIDs : make ([] uint64 , 4 ),
1190+ txs : newTxRing ( 4 ),
12291191 }
12301192 return s
12311193}
@@ -1316,40 +1278,15 @@ func (s *memSeries) append(t int64, v float64, writeID uint64) (success, chunkCr
13161278 s .sampleBuf [2 ] = s .sampleBuf [3 ]
13171279 s .sampleBuf [3 ] = sample {t : t , v : v }
13181280
1319- if s .writeIDCount == len (s .writeIDs ) {
1320- // Ring buffer is full, expand by doubling.
1321- newRing := make ([]uint64 , s .writeIDCount * 2 )
1322- idx := copy (newRing [:], s .writeIDs [s .writeIDFirst % len (s .writeIDs ):])
1323- copy (newRing [idx :], s .writeIDs [:s .writeIDFirst % len (s .writeIDs )])
1324- s .writeIDs = newRing
1325- s .writeIDFirst = 0
1326- }
1327- s .writeIDs [(s .writeIDFirst + s .writeIDCount )% len (s .writeIDs )] = writeID
1328- s .writeIDCount ++
1281+ s .txs .add (writeID )
13291282
13301283 return true , chunkCreated
13311284}
13321285
13331286// cleanupWriteIDsBelow cleans up older writeIds. Has to be called after acquiring
13341287// lock.
13351288func (s * memSeries ) cleanupWriteIDsBelow (bound uint64 ) {
1336- pos := s .writeIDFirst
1337-
1338- for s .writeIDCount > 0 {
1339- if s .writeIDs [pos ] < bound {
1340- s .writeIDFirst ++
1341- s .writeIDCount --
1342- } else {
1343- break
1344- }
1345- pos ++
1346- if pos == len (s .writeIDs ) {
1347- pos = 0
1348- }
1349- }
1350- if s .writeIDFirst >= len (s .writeIDs ) {
1351- s .writeIDFirst -= len (s .writeIDs )
1352- }
1289+ s .txs .cleanupWriteIDsBelow (bound )
13531290}
13541291
13551292func (s * memSeries ) cleanupExtraWriteIds () {
@@ -1358,28 +1295,7 @@ func (s *memSeries) cleanupExtraWriteIds() {
13581295 totalSamples += c .chunk .NumSamples ()
13591296 }
13601297
1361- if s .writeIDCount <= totalSamples {
1362- return
1363- }
1364-
1365- s .writeIDFirst += (s .writeIDCount - totalSamples )
1366- s .writeIDCount = totalSamples
1367-
1368- newBufSize := len (s .writeIDs )
1369- for totalSamples < newBufSize / 2 {
1370- newBufSize = newBufSize / 2
1371- }
1372-
1373- if newBufSize == len (s .writeIDs ) {
1374- return
1375- }
1376-
1377- newRing := make ([]uint64 , newBufSize )
1378- idx := copy (newRing [:], s .writeIDs [s .writeIDFirst % len (s .writeIDs ):])
1379- copy (newRing [idx :], s .writeIDs [:s .writeIDFirst % len (s .writeIDs )])
1380-
1381- s .writeIDs = newRing
1382- s .writeIDFirst = 0
1298+ s .txs .cutoffN (totalSamples )
13831299}
13841300
13851301// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
@@ -1393,7 +1309,7 @@ func computeChunkEndTime(start, cur, max int64) int64 {
13931309 return start + (max - start )/ a
13941310}
13951311
1396- func (s * memSeries ) iterator (id int , isolation * IsolationState ) chunkenc.Iterator {
1312+ func (s * memSeries ) iterator (id int , isoState * IsolationState ) chunkenc.Iterator {
13971313 c := s .chunk (id )
13981314
13991315 // TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
@@ -1408,7 +1324,7 @@ func (s *memSeries) iterator(id int, isolation *IsolationState) chunkenc.Iterato
14081324 numSamples := c .chunk .NumSamples ()
14091325 stopAfter := numSamples
14101326
1411- if isolation != nil {
1327+ if isoState != nil {
14121328 totalSamples := 0
14131329 previousSamples := 0 // Samples before this chunk.
14141330 for j , d := range s .chunks {
@@ -1417,23 +1333,22 @@ func (s *memSeries) iterator(id int, isolation *IsolationState) chunkenc.Iterato
14171333 previousSamples += d .chunk .NumSamples ()
14181334 }
14191335 }
1420- writeIdsToConsider := (previousSamples + c .chunk .NumSamples ()) - (totalSamples - s .writeIDCount )
1336+
1337+ writeIdsToConsider := (previousSamples + c .chunk .NumSamples ()) - (totalSamples - s .txs .txIDCount )
14211338 // Iterate over the ring, find the first one that the isolation state says not
14221339 // to return.
1423- pos := s .writeIDFirst
1340+ it := s .txs . iterator ()
14241341 for index := 0 ; index < writeIdsToConsider ; index ++ {
1425- writeID := s . writeIDs [ pos ]
1426- if _ , ok := isolation .incompleteWrites [writeID ]; ok || writeID > isolation .maxWriteID {
1427- stopAfter = index - (writeIdsToConsider - c . chunk . NumSamples () )
1342+ writeID := it . At ()
1343+ if _ , ok := isoState .incompleteWrites [writeID ]; ok || writeID > isoState .maxWriteID {
1344+ stopAfter = c . chunk . NumSamples () - (writeIdsToConsider - index )
14281345 if stopAfter < 0 {
14291346 stopAfter = 0 // Stopped in a previous chunk.
14301347 }
14311348 break
14321349 }
1433- pos ++
1434- if pos == len (s .writeIDs ) {
1435- pos = 0
1436- }
1350+
1351+ it .Next ()
14371352 }
14381353 }
14391354
0 commit comments