@@ -4,6 +4,7 @@ import better.files.File
44import cats .effect .implicits .catsEffectSyntaxBracket
55import cats .effect .{Blocker , ContextShift , Resource , Sync }
66import cats .syntax .all ._
7+ import com .avast .clients .storage .compression .ZstdDecompressOutputStream
78import com .avast .clients .storage .gcs .GcsStorageBackend .composeBlobPath
89import com .avast .clients .storage .{ConfigurationException , GetResult , HeadResult , StorageBackend , StorageException }
910import com .avast .scala .hashes .Sha256
@@ -17,7 +18,7 @@ import pureconfig.generic.ProductHint
1718import pureconfig .generic .auto ._
1819import pureconfig .{CamelCase , ConfigFieldMapping }
1920
20- import java .io .{ByteArrayInputStream , FileInputStream }
21+ import java .io .{ByteArrayInputStream , FileInputStream , OutputStream }
2122import java .nio .charset .StandardCharsets
2223import java .nio .file .StandardOpenOption
2324import java .security .{DigestOutputStream , MessageDigest }
@@ -34,7 +35,12 @@ class GcsStorageBackend[F[_]: Sync: ContextShift](storageClient: Storage, bucket
3435 blob <- getBlob(sha256)
3536 result = blob match {
3637 case Some (blob) =>
37- HeadResult .Exists (blob.getSize)
38+ blob.getMetadata.get(GcsStorageBackend .OriginalSizeHeader ) match {
39+ case null =>
40+ HeadResult .Exists (blob.getSize)
41+ case originalSize =>
42+ HeadResult .Exists (originalSize.toLong)
43+ }
3844 case None =>
3945 HeadResult .NotFound
4046 }
@@ -85,15 +91,7 @@ class GcsStorageBackend[F[_]: Sync: ContextShift](storageClient: Storage, bucket
8591 blocker
8692 .delay(destination.newOutputStream(FileStreamOpenOptions ))
8793 .bracket { fileStream =>
88- Sync [F ]
89- .delay(new DigestOutputStream (fileStream, MessageDigest .getInstance(" SHA-256" )))
90- .bracket { stream =>
91- blocker.delay(blob.downloadTo(stream)).flatMap { _ =>
92- Sync [F ].delay {
93- (blob.getSize, Sha256 (stream.getMessageDigest.digest))
94- }
95- }
96- }(stream => blocker.delay(stream.close()))
94+ downloadBlobToFile(blob, fileStream)
9795 }(fileStream => blocker.delay(fileStream.close()))
9896 .map[Either [StorageException , GetResult ]] {
9997 case (size, hash) =>
@@ -109,6 +107,53 @@ class GcsStorageBackend[F[_]: Sync: ContextShift](storageClient: Storage, bucket
109107 }
110108 }
111109
110+ private def downloadBlobToFile (blob : Blob , fileStream : OutputStream ): F [(Long , Sha256 )] = {
111+ def getCompressionType : Option [String ] = {
112+ Option (blob.getMetadata.get(GcsStorageBackend .CompressionTypeHeader )).map(_.toLowerCase)
113+ }
114+
115+ Sync [F ]
116+ .delay {
117+ val countingStream = new GcsStorageBackend .CountingOutputStream (fileStream)
118+ val hashingStream = new DigestOutputStream (countingStream, MessageDigest .getInstance(" SHA-256" ))
119+ (countingStream, hashingStream)
120+ }
121+ .bracket {
122+ case (countingStream, hashingStream) => {
123+ getCompressionType match {
124+ case None =>
125+ downloadBlobToStream(blob, hashingStream)
126+ case Some (" zstd" ) =>
127+ decodeZstdBlobToStream(blob, hashingStream)
128+ case Some (unknown) =>
129+ throw new IllegalArgumentException (s " Unknown compression type $unknown" )
130+ }
131+ }.flatMap { _ =>
132+ Sync [F ].delay {
133+ (countingStream.length, Sha256 (hashingStream.getMessageDigest.digest))
134+ }
135+ }
136+ } {
137+ case (hashingStream, countingStream) =>
138+ Sync [F ].delay {
139+ hashingStream.close()
140+ countingStream.close()
141+ }
142+ }
143+ }
144+
145+ private def decodeZstdBlobToStream (blob : Blob , targetStream : DigestOutputStream ): F [Unit ] = {
146+ Sync [F ]
147+ .delay(new ZstdDecompressOutputStream (targetStream))
148+ .bracket { decompressionStream =>
149+ downloadBlobToStream(blob, decompressionStream)
150+ }(decompressionStream => Sync [F ].delay(decompressionStream.close()))
151+ }
152+
153+ private def downloadBlobToStream (blob : Blob , targetStream : OutputStream ): F [Unit ] = {
154+ blocker.delay(blob.downloadTo(targetStream))
155+ }
156+
112157 override def close (): Unit = {
113158 ()
114159 }
@@ -117,6 +162,9 @@ class GcsStorageBackend[F[_]: Sync: ContextShift](storageClient: Storage, bucket
117162object GcsStorageBackend {
118163 private val DefaultConfig = ConfigFactory .defaultReference().getConfig(" gcsBackendDefaults" )
119164
165+ private [gcs] val CompressionTypeHeader = " comp-type"
166+ private [gcs] val OriginalSizeHeader = " original-size"
167+
120168 def fromConfig [F [_]: Sync : ContextShift ](config : Config ,
121169 blocker : Blocker ): Either [ConfigurationException , Resource [F , GcsStorageBackend [F ]]] = {
122170
@@ -148,6 +196,35 @@ object GcsStorageBackend {
148196 String .join(" /" , sha256Hex.substring(0 , 2 ), sha256Hex.substring(2 , 4 ), sha256Hex.substring(4 , 6 ), sha256Hex)
149197 }
150198
199+ private [gcs] class CountingOutputStream (target : OutputStream ) extends OutputStream {
200+ private var count : Long = 0
201+
202+ def length : Long = count
203+
204+ override def write (b : Int ): Unit = {
205+ target.write(b)
206+ count += 1
207+ }
208+
209+ override def write (b : Array [Byte ]): Unit = {
210+ target.write(b)
211+ count += b.length
212+ }
213+
214+ override def write (b : Array [Byte ], off : Int , len : Int ): Unit = {
215+ target.write(b, off, len)
216+ count += len
217+ }
218+
219+ override def flush (): Unit = {
220+ target.flush()
221+ }
222+
223+ override def close (): Unit = {
224+ target.close()
225+ }
226+ }
227+
151228 def prepareStorageClient [F [_]: Sync : ContextShift ](conf : GcsBackendConfiguration ,
152229 blocker : Blocker ): Either [ConfigurationException , Storage ] = {
153230 Either
0 commit comments