Skip to content

Commit

Permalink
Checker: account for possibility that one chunkId maps to more than o…
Browse files Browse the repository at this point in the history
…ne blob
  • Loading branch information
grote committed Nov 1, 2024
1 parent 4f5199c commit 6d9c18b
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 32 deletions.
60 changes: 36 additions & 24 deletions app/src/main/java/com/stevesoltys/seedvault/repo/Checker.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package com.stevesoltys.seedvault.repo

import androidx.annotation.WorkerThread
import com.google.protobuf.ByteString
import com.stevesoltys.seedvault.backend.BackendManager
import com.stevesoltys.seedvault.crypto.Crypto
import com.stevesoltys.seedvault.proto.Snapshot
Expand Down Expand Up @@ -71,10 +72,12 @@ internal class Checker(
this.handleSize = handles.size // remember number of snapshot handles we had

// get total disk space used by snapshots
val sizeMap = mutableMapOf<String, Int>()
val sizeMap = mutableMapOf<ByteString, Int>() // uses blob.id as key
snapshots.forEach { snapshot ->
// add sizes to a map first, so we don't double count
snapshot.blobsMap.forEach { (chunkId, blob) -> sizeMap[chunkId] = blob.length }
snapshot.blobsMap.forEach { (_, blob) ->
sizeMap[blob.id] = blob.length
}
}
return sizeMap.values.sumOf { it.toLong() }
}
Expand All @@ -96,13 +99,13 @@ internal class Checker(
"Got $handleSize handles, but ${snapshots.size} snapshots."
}
val blobSample = getBlobSample(snapshots, percent)
val sampleSize = blobSample.values.sumOf { it.length.toLong() }
val sampleSize = blobSample.sumOf { it.blob.length.toLong() }
log.info { "Blob sample has ${blobSample.size} blobs worth $sampleSize bytes." }

// check blobs concurrently
val semaphore = Semaphore(concurrencyLimit)
val size = AtomicLong()
val badChunks = ConcurrentSkipListSet<String>()
val badChunks = ConcurrentSkipListSet<ChunkIdBlobPair>()
val lastNotification = AtomicLong()
val startTime = System.currentTimeMillis()
coroutineScope {
Expand All @@ -116,7 +119,7 @@ internal class Checker(
} catch (e: Exception) {
log.error(e) { "Error loading chunk $chunkId: " }
// TODO we could try differentiating transient backend issues
badChunks.add(chunkId)
badChunks.add(ChunkIdBlobPair(chunkId, blob))
}
}
// keep track of how much we checked and for how long
Expand Down Expand Up @@ -154,25 +157,30 @@ internal class Checker(
checkerResult = null
}

private fun getBlobSample(snapshots: List<Snapshot>, percent: Int): Map<String, Blob> {
// split up blobs for app data and for APKs
val appBlobs = mutableMapOf<String, Blob>()
val apkBlobs = mutableMapOf<String, Blob>()
private fun getBlobSample(
snapshots: List<Snapshot>,
percent: Int,
): List<ChunkIdBlobPair> {
// split up blobs for app data and for APKs (use blob.id as key to prevent double counting)
val appBlobs = mutableMapOf<ByteString, ChunkIdBlobPair>()
val apkBlobs = mutableMapOf<ByteString, ChunkIdBlobPair>()
snapshots.forEach { snapshot ->
val appChunkIds = snapshot.appsMap.flatMap { it.value.chunkIdsList.hexFromProto() }
val apkChunkIds = snapshot.appsMap.flatMap {
it.value.apk.splitsList.flatMap { split -> split.chunkIdsList.hexFromProto() }
}
appChunkIds.forEach { chunkId ->
appBlobs[chunkId] = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
val blob = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
appBlobs[blob.id] = ChunkIdBlobPair(chunkId, blob)
}
apkChunkIds.forEach { chunkId ->
apkBlobs[chunkId] = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
val blob = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
apkBlobs[blob.id] = ChunkIdBlobPair(chunkId, blob)
}
}
// calculate sizes
val appSize = appBlobs.values.sumOf { it.length.toLong() }
val apkSize = apkBlobs.values.sumOf { it.length.toLong() }
val appSize = appBlobs.values.sumOf { it.blob.length.toLong() }
val apkSize = apkBlobs.values.sumOf { it.blob.length.toLong() }
// let's assume it is unlikely that app data and APKs have blobs in common
val totalSize = appSize + apkSize
log.info { "Got ${appBlobs.size + apkBlobs.size} blobs worth $totalSize bytes to check." }
Expand All @@ -182,23 +190,21 @@ internal class Checker(
val appTargetSize = min((targetSize * 0.75).roundToLong(), appSize) // 75% of targetSize
log.info { "Sampling $targetSize bytes of which $appTargetSize bytes for apps." }

val blobSample = mutableMapOf<String, Blob>()
val blobSample = mutableListOf<ChunkIdBlobPair>()
var currentSize = 0L
// check apps first until we reach their target size
val appIterator = appBlobs.keys.shuffled().iterator() // random app blob iterator
val appIterator = appBlobs.values.shuffled().iterator() // random app blob iterator
while (currentSize < appTargetSize && appIterator.hasNext()) {
val randomChunkId = appIterator.next()
val blob = appBlobs[randomChunkId] ?: error("No blob")
blobSample[randomChunkId] = blob
currentSize += blob.length
val pair = appIterator.next()
blobSample.add(pair)
currentSize += pair.blob.length
}
// now check APKs until we reach total targetSize
val apkIterator = apkBlobs.keys.shuffled().iterator() // random APK blob iterator
val apkIterator = apkBlobs.values.shuffled().iterator() // random APK blob iterator
while (currentSize < targetSize && apkIterator.hasNext()) {
val randomChunkId = apkIterator.next()
val blob = apkBlobs[randomChunkId] ?: error("No blob")
blobSample[randomChunkId] = blob
currentSize += blob.length
val pair = apkIterator.next()
blobSample.add(pair)
currentSize += pair.blob.length
}
return blobSample
}
Expand All @@ -216,3 +222,9 @@ internal class Checker(
if (readChunkId != chunkId) throw GeneralSecurityException("ChunkId doesn't match")
}
}

data class ChunkIdBlobPair(val chunkId: String, val blob: Blob) : Comparable<ChunkIdBlobPair> {
override fun compareTo(other: ChunkIdBlobPair): Int {
return chunkId.compareTo(other.chunkId)
}
}
20 changes: 17 additions & 3 deletions app/src/main/java/com/stevesoltys/seedvault/repo/CheckerResult.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,31 @@ sealed class CheckerResult {
/**
* The list of chunkIDs that had errors.
*/
val errorChunkIds: Set<String>,
val errorChunkIdBlobPairs: Set<ChunkIdBlobPair>,
) : CheckerResult() {
val goodSnapshots: List<Snapshot>
val badSnapshots: List<Snapshot>

init {
val good = mutableListOf<Snapshot>()
val bad = mutableListOf<Snapshot>()
val errorChunkIds = errorChunkIdBlobPairs.map { it.chunkId }.toSet()
snapshots.forEach { snapshot ->
val isGood = snapshot.blobsMap.keys.intersect(errorChunkIds).isEmpty()
if (isGood) good.add(snapshot) else bad.add(snapshot)
val badChunkIds = snapshot.blobsMap.keys.intersect(errorChunkIds)
if (badChunkIds.isEmpty()) {
// snapshot doesn't contain chunks with erroneous blobs
good.add(snapshot)
} else {
// snapshot may contain chunks with erroneous blobs, check deeper
val isBad = badChunkIds.any { chunkId ->
val blob = snapshot.blobsMap[chunkId] ?: error("No blob for chunkId")
// is this chunkId/blob pair in errorChunkIdBlobPairs?
errorChunkIdBlobPairs.any { pair ->
pair.chunkId == chunkId && pair.blob == blob
}
}
if (isBad) bad.add(snapshot) else good.add(snapshot)
}
}
goodSnapshots = good
badSnapshots = bad
Expand Down
93 changes: 88 additions & 5 deletions app/src/test/java/com/stevesoltys/seedvault/repo/CheckerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,36 @@ internal class CheckerTest : TransportTest() {
assertEquals(expectedSize, checker.getBackupSize())
}

@Test
fun `getBackupSize returns size without under-counting blobs with same chunkId`() =
runBlocking {
val apk = apk.copy {
splits.clear()
splits.add(baseSplit.copy {
this.chunkIds.clear()
chunkIds.add(ByteString.fromHex(chunkId1))
})
}
val snapshot = snapshot.copy {
apps[packageName] = app.copy { this.apk = apk }
blobs.clear()
}
val snapshotMap = mapOf(
snapshotHandle1 to snapshot.copy {
token = 1
blobs[chunkId1] = blob1
},
snapshotHandle2 to snapshot.copy {
token = 2
blobs[chunkId1] = blob2
},
)
val expectedSize = blob1.length.toLong() + blob2.length.toLong()
expectLoadingSnapshots(snapshotMap)

assertEquals(expectedSize, checker.getBackupSize())
}

@Test
fun `check works even with no backup data`() = runBlocking {
expectLoadingSnapshots(emptyMap())
Expand Down Expand Up @@ -138,7 +168,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet())
assertEquals(emptyList<Snapshot>(), result.goodSnapshots)
assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(chunkId1, chunkId2), result.errorChunkIds)
val errorPairs = setOf(ChunkIdBlobPair(chunkId1, blob1), ChunkIdBlobPair(chunkId2, blob2))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}

@Test
Expand Down Expand Up @@ -189,7 +220,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(listOf(snapshotMap[snapshotHandle1]), result.goodSnapshots)
assertEquals(listOf(snapshotMap[snapshotHandle2]), result.badSnapshots)
assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(chunkId2), result.errorChunkIds)
val errorPairs = setOf(ChunkIdBlobPair(chunkId2, blob2))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}

@Test
Expand Down Expand Up @@ -247,8 +279,8 @@ internal class CheckerTest : TransportTest() {
fun `check prefers app data over APKs`() = runBlocking {
val appDataBlob = blob {
id = ByteString.copyFrom(Random.nextBytes(32))
length = Random.nextInt(0, Int.MAX_VALUE)
uncompressedLength = Random.nextInt(0, Int.MAX_VALUE)
length = Random.nextInt(1, Int.MAX_VALUE)
uncompressedLength = Random.nextInt(1, Int.MAX_VALUE)
}
val appDataBlobHandle1 = AppBackupFileType.Blob(repoId, appDataBlob.id.hexFromProto())
val appDataChunkId = Random.nextBytes(32).toHexString()
Expand All @@ -266,6 +298,7 @@ internal class CheckerTest : TransportTest() {
// only loading app data, not other blobs
coEvery { loader.loadFile(appDataBlobHandle1, null) } throws SecurityException()

println("appDataBlob.length = $appDataBlob.length")
every { nm.onCheckFinishedWithError(appDataBlob.length.toLong(), any()) } just Runs

assertNull(checker.checkerResult)
Expand All @@ -275,14 +308,64 @@ internal class CheckerTest : TransportTest() {
assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet())
assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet())
assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(appDataChunkId), result.errorChunkIds)
val errorPairs = setOf(ChunkIdBlobPair(appDataChunkId, appDataBlob))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)

coVerify(exactly = 0) {
loader.loadFile(blobHandle1, null)
loader.loadFile(blobHandle2, null)
}
}

@Test
fun `check doesn't skip broken blobs that have a fix with same chunkID`() = runBlocking {
// get "real" data for blob2
val messageDigest = MessageDigest.getInstance("SHA-256")
val data1 = getRandomByteArray() // broken blob
val data2 = getRandomByteArray() // data2 matches chunkId
val chunkId = messageDigest.digest(data2).toHexString()
val apk = apk.copy {
splits.clear()
splits.add(baseSplit.copy {
this.chunkIds.clear()
chunkIds.add(ByteString.fromHex(chunkId))
})
}
val snapshot = snapshot.copy {
apps[packageName] = app.copy { this.apk = apk }
blobs.clear()
}
val snapshotMap = mapOf(
snapshotHandle1 to snapshot.copy {
token = 1
blobs[chunkId] = blob1 // snapshot1 has broken blob for chunkId
},
snapshotHandle2 to snapshot.copy {
token = 2
blobs[chunkId] = blob2 // snapshot2 has fixed blob for chunkId
},
)

expectLoadingSnapshots(snapshotMap)
every { backendManager.requiresNetwork } returns Random.nextBoolean()

coEvery { loader.loadFile(blobHandle1, null) } returns ByteArrayInputStream(data1)
coEvery { loader.loadFile(blobHandle2, null) } returns ByteArrayInputStream(data2)

every { nm.onCheckFinishedWithError(any(), any()) } just Runs

assertNull(checker.checkerResult)
checker.check(100)
assertInstanceOf(CheckerResult.Error::class.java, checker.checkerResult)
val result = checker.checkerResult as CheckerResult.Error
assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet())
assertEquals(setOf(snapshotMap[snapshotHandle2]), result.goodSnapshots.toSet())
assertEquals(setOf(snapshotMap[snapshotHandle1]), result.badSnapshots.toSet())
assertEquals(snapshotMap.size, result.existingSnapshots)
val errorPairs = setOf(ChunkIdBlobPair(chunkId, blob1))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}

private suspend fun expectLoadingSnapshots(
snapshots: Map<AppBackupFileType.Snapshot, Snapshot>,
) {
Expand Down

0 comments on commit 6d9c18b

Please sign in to comment.