Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Miscellaneous blob file deduplication improvements/fixes #6471

Merged
merged 3 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions node/test/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { EventId2EventName, C } from '../dist/constants.js'
import { join } from 'path'
import { statSync } from 'fs'
import { Context } from '../dist/context.js'
import {fileURLToPath} from 'url';
import { fileURLToPath } from 'url';

const __dirname = fileURLToPath(new URL('.', import.meta.url));

Expand Down Expand Up @@ -444,7 +444,7 @@ describe('Offline Tests with unconfigured account', function () {
context.setChatProfileImage(chatId, imagePath)
const blobPath = context.getChat(chatId).getProfileImage()
expect(blobPath.startsWith(blobs)).to.be.true
expect(blobPath.includes('image')).to.be.true
expect(blobPath.includes('image')).to.be.false
expect(blobPath.endsWith('.jpeg')).to.be.true

context.setChatProfileImage(chatId, null)
Expand Down
53 changes: 18 additions & 35 deletions src/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

use core::cmp::max;
use std::ffi::OsStr;
use std::fmt;
use std::io::{Cursor, Seek};
use std::iter::FusedIterator;
use std::mem;
use std::path::{Path, PathBuf};

use anyhow::{format_err, Context as _, Result};
use anyhow::{ensure, format_err, Context as _, Result};
use base64::Engine as _;
use futures::StreamExt;
use image::codecs::jpeg::JpegEncoder;
Expand Down Expand Up @@ -139,7 +138,7 @@ impl<'a> BlobObject<'a> {
let src_in_blobdir: &Path;
let blobdir = context.get_blobdir();

if src.starts_with(blobdir) || src.starts_with("$BLOBDIR/") {
if src.starts_with(blobdir) {
src_in_blobdir = src;
} else {
info!(
Expand Down Expand Up @@ -440,7 +439,7 @@ impl<'a> BlobObject<'a> {
// 32 / 4 * 3 = 24k if you account for base64 encoding. To be safe, we reduced this to 20k.
self.recode_to_size(
context,
"".to_string(), // The name of an avatar doesn't matter
None, // The name of an avatar doesn't matter
maybe_sticker,
img_wh,
20_000,
Expand All @@ -460,7 +459,7 @@ impl<'a> BlobObject<'a> {
pub async fn recode_to_image_size(
&mut self,
context: &Context,
name: String,
name: Option<String>,
maybe_sticker: &mut bool,
) -> Result<String> {
let (img_wh, max_bytes) =
Expand Down Expand Up @@ -495,11 +494,10 @@ impl<'a> BlobObject<'a> {
/// then the updated user-visible filename will be returned;
/// this may be necessary because the format may be changed to JPG,
/// i.e. "image.png" -> "image.jpg".
/// Pass an empty string if you don't care.
fn recode_to_size(
&mut self,
context: &Context,
mut name: String,
name: Option<String>,
maybe_sticker: &mut bool,
mut img_wh: u32,
max_bytes: usize,
Expand All @@ -509,6 +507,7 @@ impl<'a> BlobObject<'a> {
let mut add_white_bg = img_wh <= constants::BALANCED_AVATAR_SIZE;
let mut no_exif = false;
let no_exif_ref = &mut no_exif;
let mut name = name.unwrap_or_else(|| self.name.clone());
let original_name = name.clone();
let res: Result<String> = tokio::task::block_in_place(move || {
let mut file = std::fs::File::open(self.to_abs_path())?;
Expand Down Expand Up @@ -678,6 +677,10 @@ impl<'a> BlobObject<'a> {
}

fn file_hash(src: &Path) -> Result<blake3::Hash> {
ensure!(
!src.starts_with("$BLOBDIR/"),
"Use `get_abs_path()` to get the absolute path of the blobfile"
);
let mut hasher = blake3::Hasher::new();
let mut src_file = std::fs::File::open(src)
.with_context(|| format!("Failed to open file {}", src.display()))?;
Expand All @@ -689,7 +692,7 @@ fn file_hash(src: &Path) -> Result<blake3::Hash> {
}

/// Returns image file size and Exif.
pub fn image_metadata(file: &std::fs::File) -> Result<(u64, Option<exif::Exif>)> {
fn image_metadata(file: &std::fs::File) -> Result<(u64, Option<exif::Exif>)> {
let len = file.metadata()?.len();
let mut bufreader = std::io::BufReader::new(file);
let exif = exif::Reader::new().read_from_container(&mut bufreader).ok();
Expand All @@ -710,12 +713,6 @@ fn exif_orientation(exif: &exif::Exif, context: &Context) -> i32 {
0
}

impl fmt::Display for BlobObject<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "$BLOBDIR/{}", self.name)
}
}

/// All files in the blobdir.
///
/// This exists so we can have a [`BlobDirIter`] which needs something to own the data of
Expand Down Expand Up @@ -1106,15 +1103,8 @@ mod tests {
let img_wh = 128;
let maybe_sticker = &mut false;
let strict_limits = true;
blob.recode_to_size(
&t,
"avatar.png".to_string(),
maybe_sticker,
img_wh,
20_000,
strict_limits,
)
.unwrap();
blob.recode_to_size(&t, None, maybe_sticker, img_wh, 20_000, strict_limits)
.unwrap();
tokio::task::block_in_place(move || {
let img = ImageReader::open(blob.to_abs_path())
.unwrap()
Expand Down Expand Up @@ -1145,7 +1135,7 @@ mod tests {
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
let avatar_path = Path::new(&avatar_blob);
assert!(
avatar_blob.ends_with("d98cd30ed8f2129bf3968420208849d"),
avatar_blob.ends_with("d98cd30ed8f2129bf3968420208849d.jpg"),
"The avatar filename should be its hash, put instead it's {avatar_blob}"
);
let scaled_avatar_size = file_size(avatar_path).await;
Expand All @@ -1161,15 +1151,8 @@ mod tests {
let mut blob = BlobObject::new_from_path(&t, avatar_path).await.unwrap();
let maybe_sticker = &mut false;
let strict_limits = true;
blob.recode_to_size(
&t,
"avatar.jpg".to_string(),
maybe_sticker,
1000,
3000,
strict_limits,
)
.unwrap();
blob.recode_to_size(&t, None, maybe_sticker, 1000, 3000, strict_limits)
.unwrap();
let new_file_size = file_size(&blob.to_abs_path()).await;
assert!(new_file_size <= 3000);
assert!(new_file_size > 2000);
Expand Down Expand Up @@ -1204,7 +1187,7 @@ mod tests {
.unwrap();
let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
assert!(
avatar_cfg.ends_with("9e7f409ac5c92b942cc4f31cee2770a"),
avatar_cfg.ends_with("9e7f409ac5c92b942cc4f31cee2770a.png"),
"Avatar file name {avatar_cfg} should end with its hash"
);

Expand All @@ -1221,7 +1204,7 @@ mod tests {
let avatar_src = t.dir.path().join("avatar.png");
let avatar_bytes = include_bytes!("../test-data/image/avatar64x64.png");
fs::write(&avatar_src, avatar_bytes).await.unwrap();
let avatar_blob = t.get_blobdir().join("avatar.png");
let avatar_blob = t.get_blobdir().join("e9b6c7a78aa2e4f415644f55a553e73.png");
assert!(!avatar_blob.exists());
t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap()))
.await
Expand Down
12 changes: 6 additions & 6 deletions src/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2782,11 +2782,7 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> {
|| maybe_sticker && !msg.param.exists(Param::ForceSticker))
{
let new_name = blob
.recode_to_image_size(
context,
msg.get_filename().unwrap_or_else(|| "file".to_string()),
&mut maybe_sticker,
)
.recode_to_image_size(context, msg.get_filename(), &mut maybe_sticker)
.await?;
msg.param.set(Param::Filename, new_name);

Expand Down Expand Up @@ -4170,7 +4166,11 @@ pub async fn set_chat_profile_image(
msg.param.remove(Param::Arg);
msg.text = stock_str::msg_grp_img_deleted(context, ContactId::SELF).await;
} else {
let mut image_blob = BlobObject::new_from_path(context, Path::new(new_image)).await?;
let mut image_blob = BlobObject::create_and_deduplicate(
context,
Path::new(new_image),
Path::new(new_image),
)?;
image_blob.recode_to_avatar_size(context).await?;
chat.param.set(Param::ProfileImage, image_blob.as_name());
msg.param.set(Param::Arg, image_blob.as_name());
Expand Down
3 changes: 2 additions & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,8 @@ impl Context {
.await?;
match value {
Some(path) => {
let mut blob = BlobObject::new_from_path(self, path.as_ref()).await?;
let path = get_abs_path(self, Path::new(path));
let mut blob = BlobObject::create_and_deduplicate(self, &path, &path)?;
blob.recode_to_avatar_size(self).await?;
self.sql
.set_raw_config(key.as_ref(), Some(blob.as_name()))
Expand Down
4 changes: 3 additions & 1 deletion src/webxdc/integration.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::path::Path;

use crate::chat::{send_msg, ChatId};
use crate::config::Config;
use crate::contact::ContactId;
Expand All @@ -13,7 +15,7 @@ impl Context {
pub async fn set_webxdc_integration(&self, file: &str) -> Result<()> {
let chat_id = ChatId::create_for_contact(self, ContactId::SELF).await?;
let mut msg = Message::new(Viewtype::Webxdc);
msg.set_file(file, None);
msg.set_file_and_deduplicate(self, Path::new(&file), None, None)?;
msg.hidden = true;
msg.param.set_int(Param::WebxdcIntegration, 1);
msg.param.set_int(Param::GuaranteeE2ee, 1); // needed to pass `internet_access` requirements
Expand Down
Loading