Skip to content

Commit 1c8fa0d

Browse files
committed
feat: Remove original file stem from filenames in the blobstorage (#4309)
This way filenames in the blobstorage are just random hex numbers. This also allows us to get rid of the `sanitize-filename` dependency.
1 parent 1d69b7f commit 1c8fa0d

File tree

10 files changed

+36
-104
lines changed

10 files changed

+36
-104
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ regex = "1.8"
7575
reqwest = { version = "0.11.18", features = ["json"] }
7676
rusqlite = { version = "0.29", features = ["sqlcipher"] }
7777
rust-hsluv = "0.1"
78-
sanitize-filename = "0.4"
7978
serde_json = "1.0"
8079
serde = { version = "1.0", features = ["derive"] }
8180
sha-1 = "0.10"

node/test/test.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,6 @@ describe('Offline Tests with unconfigured account', function () {
446446
context.setChatProfileImage(chatId, imagePath)
447447
const blobPath = context.getChat(chatId).getProfileImage()
448448
expect(blobPath.startsWith(blobs)).to.be.true
449-
expect(blobPath.includes('image')).to.be.true
450449
expect(blobPath.endsWith('.jpeg')).to.be.true
451450

452451
context.setChatProfileImage(chatId, null)

python/tests/test_1_online.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,13 +182,11 @@ def send_and_receive_message():
182182
msg = send_and_receive_message()
183183
assert msg.text == "withfile"
184184
assert open(msg.file_path).read() == "some data"
185-
msg.file_path.index(basename)
186185
assert msg.file_path.endswith(ext)
187186

188187
msg2 = send_and_receive_message()
189188
assert msg2.text == "withfile"
190189
assert open(msg2.file_path).read() == "some data"
191-
msg2.file_path.index(basename)
192190
assert msg2.file_path.endswith(ext)
193191
assert msg.file_path != msg2.file_path
194192

@@ -215,7 +213,6 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp):
215213
msg = ac2.get_message_by_id(ev.data2)
216214

217215
assert open(msg.file_path).read() == content
218-
msg.file_path.index(basename)
219216
assert msg.file_path.endswith(ext)
220217

221218

python/tests/test_2_increation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ def test_no_increation_copies_to_blobdir(self, tmp_path, acfactory, lp):
5050
src = tmp_path / "file.txt"
5151
src.write_text("hello there\n")
5252
msg = chat.send_file(str(src))
53-
assert msg.file_path.startswith(os.path.join(ac1.get_blobdir(), "file"))
5453
assert msg.file_path.endswith(".txt")
5554

5655
def test_forward_increation(self, acfactory, data, lp):

src/blob.rs

Lines changed: 34 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ impl<'a> BlobObject<'a> {
4747
data: &[u8],
4848
) -> Result<BlobObject<'a>> {
4949
let blobdir = context.get_blobdir();
50-
let (stem, ext) = BlobObject::sanitise_name(suggested_name);
51-
let (name, mut file) = BlobObject::create_new_file(context, blobdir, &stem, &ext).await?;
50+
let ext = BlobObject::get_extension(suggested_name);
51+
let (name, mut file) = BlobObject::create_new_file(context, blobdir, &ext).await?;
5252
file.write_all(data).await.context("file write failure")?;
5353

5454
// workaround a bug in async-std
@@ -68,12 +68,11 @@ impl<'a> BlobObject<'a> {
6868
async fn create_new_file(
6969
context: &Context,
7070
dir: &Path,
71-
stem: &str,
7271
ext: &str,
7372
) -> Result<(String, fs::File)> {
7473
let mut attempt = 0;
7574
loop {
76-
let name = format!("{}-{:016x}{}", stem, rand::random::<u64>(), ext);
75+
let name = format!("{:016x}{}", rand::random::<u64>(), ext);
7776
attempt += 1;
7877
let path = dir.join(&name);
7978
match fs::OpenOptions::new()
@@ -104,9 +103,9 @@ impl<'a> BlobObject<'a> {
104103
let mut src_file = fs::File::open(src)
105104
.await
106105
.with_context(|| format!("failed to open file {}", src.display()))?;
107-
let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy());
106+
let ext = BlobObject::get_extension(&src.to_string_lossy());
108107
let (name, mut dst_file) =
109-
BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?;
108+
BlobObject::create_new_file(context, context.get_blobdir(), &ext).await?;
110109
let name_for_err = name.clone();
111110
if let Err(err) = io::copy(&mut src_file, &mut dst_file).await {
112111
// Attempt to remove the failed file, swallow errors resulting from that.
@@ -130,10 +129,8 @@ impl<'a> BlobObject<'a> {
130129
///
131130
/// If the source file is not a path to into the blob directory
132131
/// the file will be copied into the blob directory first. If the
133-
/// source file is already in the blobdir it will not be copied
134-
/// and only be created if it is a valid blobname, that is no
135-
/// subdirectory is used and [BlobObject::sanitise_name] does not
136-
/// modify the filename.
132+
/// source file is already in the blobdir (but not in a subdirectory)
133+
/// it will not be copied and only be created if it is a valid blobname.
137134
///
138135
/// Paths into the blob directory may be either defined by an absolute path
139136
/// or by the relative prefix `$BLOBDIR`.
@@ -150,8 +147,7 @@ impl<'a> BlobObject<'a> {
150147
/// Returns a [BlobObject] for an existing blob from a path.
151148
///
152149
/// The path must designate a file directly in the blobdir and
153-
/// must use a valid blob name. That is after sanitisation the
154-
/// name must still be the same, that means it must be valid UTF-8
150+
/// must use a valid blob name. That means it must be valid UTF-8
155151
/// and not have any special characters in it.
156152
pub fn from_path(context: &'a Context, path: &Path) -> Result<BlobObject<'a>> {
157153
let rel_path = path
@@ -225,21 +221,8 @@ impl<'a> BlobObject<'a> {
225221
}
226222
}
227223

228-
/// Create a safe name based on a messy input string.
229-
///
230-
/// The safe name will be a valid filename on Unix and Windows and
231-
/// not contain any path separators. The input can contain path
232-
/// segments separated by either Unix or Windows path separators,
233-
/// the rightmost non-empty segment will be used as name,
234-
/// sanitised for special characters.
235-
///
236-
/// The resulting name is returned as a tuple, the first part
237-
/// being the stem or basename and the second being an extension,
238-
/// including the dot. E.g. "foo.txt" is returned as `("foo",
239-
/// ".txt")` while "bar" is returned as `("bar", "")`.
240-
///
241-
/// The extension part will always be lowercased.
242-
fn sanitise_name(name: &str) -> (String, String) {
224+
/// Get a file extension if any, including the dot, in lower case, otherwise an empty string.
225+
fn get_extension(name: &str) -> String {
243226
let mut name = name.to_string();
244227
for part in name.rsplit('/') {
245228
if !part.is_empty() {
@@ -253,20 +236,12 @@ impl<'a> BlobObject<'a> {
253236
break;
254237
}
255238
}
256-
let opts = sanitize_filename::Options {
257-
truncate: true,
258-
windows: true,
259-
replacement: "",
260-
};
261239

262-
let clean = sanitize_filename::sanitize_with_options(name, opts);
263240
// Let's take the tricky filename
264241
// "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example.
265242
// Split it into "file" and "with_lots_of_characters_behind_point_and_double_ending.tar.gz":
266-
let mut iter = clean.splitn(2, '.');
267-
268-
let stem: String = iter.next().unwrap_or_default().chars().take(64).collect();
269-
// stem == "file"
243+
let mut iter = name.splitn(2, '.');
244+
iter.next();
270245

271246
let ext_chars = iter.next().unwrap_or_default().chars();
272247
let ext: String = ext_chars
@@ -279,11 +254,10 @@ impl<'a> BlobObject<'a> {
279254
// ext == "d_point_and_double_ending.tar.gz"
280255

281256
if ext.is_empty() {
282-
(stem, "".to_string())
257+
ext
283258
} else {
284-
(stem, format!(".{ext}").to_lowercase())
285-
// Return ("file", ".d_point_and_double_ending.tar.gz")
286-
// which is not perfect but acceptable.
259+
format!(".{ext}").to_lowercase()
260+
// Return ".d_point_and_double_ending.tar.gz", which is not perfect but acceptable.
287261
}
288262
}
289263

@@ -638,7 +612,7 @@ mod tests {
638612
async fn test_create() {
639613
let t = TestContext::new().await;
640614
let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap();
641-
let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap();
615+
let re = Regex::new("^[[:xdigit:]]{16}$").unwrap();
642616
assert!(re.is_match(blob.as_file_name()));
643617
let fname = t.get_blobdir().join(blob.as_file_name());
644618
let data = fs::read(fname).await.unwrap();
@@ -657,23 +631,23 @@ mod tests {
657631
async fn test_lowercase_ext() {
658632
let t = TestContext::new().await;
659633
let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap();
660-
let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap();
634+
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt$").unwrap();
661635
assert!(re.is_match(blob.as_name()));
662636
}
663637

664638
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
665639
async fn test_as_file_name() {
666640
let t = TestContext::new().await;
667641
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
668-
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
642+
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
669643
assert!(re.is_match(blob.as_file_name()));
670644
}
671645

672646
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
673647
async fn test_as_rel_path() {
674648
let t = TestContext::new().await;
675649
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
676-
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
650+
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
677651
assert!(re.is_match(blob.as_rel_path().to_str().unwrap()));
678652
}
679653

@@ -689,7 +663,7 @@ mod tests {
689663
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
690664
async fn test_create_dup() {
691665
let t = TestContext::new().await;
692-
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
666+
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
693667

694668
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
695669
assert!(re.is_match(blob.as_rel_path().to_str().unwrap()));
@@ -710,7 +684,7 @@ mod tests {
710684
let blob = BlobObject::create(&t, "foo.tar.gz", b"hello")
711685
.await
712686
.unwrap();
713-
let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap();
687+
let re = Regex::new("^[[:xdigit:]]{16}.tar.gz$").unwrap();
714688
assert!(re.is_match(blob.as_file_name()));
715689
let foo_path = t.get_blobdir().join(blob.as_file_name());
716690
assert!(foo_path.exists());
@@ -725,7 +699,6 @@ mod tests {
725699
} else {
726700
let name = fname.to_str().unwrap();
727701
println!("{name}");
728-
assert!(name.starts_with("foo"));
729702
assert!(name.ends_with(".tar.gz"));
730703
}
731704
}
@@ -746,7 +719,7 @@ mod tests {
746719
let src = t.dir.path().join("src");
747720
fs::write(&src, b"boo").await.unwrap();
748721
let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap();
749-
let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap();
722+
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap();
750723
assert!(re.is_match(blob.as_name()));
751724
let data = fs::read(blob.to_abs_path()).await.unwrap();
752725
assert_eq!(data, b"boo");
@@ -768,7 +741,7 @@ mod tests {
768741
let blob = BlobObject::new_from_path(&t, src_ext.as_ref())
769742
.await
770743
.unwrap();
771-
let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap();
744+
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap();
772745
assert!(re.is_match(blob.as_name()));
773746
let data = fs::read(blob.to_abs_path()).await.unwrap();
774747
assert_eq!(data, b"boo");
@@ -781,20 +754,6 @@ mod tests {
781754
assert_eq!(data, b"boo");
782755
}
783756

784-
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
785-
async fn test_create_from_name_long() {
786-
let t = TestContext::new().await;
787-
let src_ext = t.dir.path().join("autocrypt-setup-message-4137848473.html");
788-
fs::write(&src_ext, b"boo").await.unwrap();
789-
let blob = BlobObject::new_from_path(&t, src_ext.as_ref())
790-
.await
791-
.unwrap();
792-
let re =
793-
Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$")
794-
.unwrap();
795-
assert!(re.is_match(blob.as_name()));
796-
}
797-
798757
#[test]
799758
fn test_is_blob_name() {
800759
assert!(BlobObject::is_acceptible_blob_name("foo"));
@@ -806,42 +765,26 @@ mod tests {
806765
}
807766

808767
#[test]
809-
fn test_sanitise_name() {
810-
let (stem, ext) =
811-
BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
768+
fn test_get_extension() {
769+
let ext = BlobObject::get_extension("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
812770
assert_eq!(ext, ".txt");
813-
assert!(!stem.is_empty());
814771

815-
// the extensions are kept together as between stem and extension a number may be added -
816-
// and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz`
817-
let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz");
818-
assert_eq!(stem, "wot");
772+
let ext = BlobObject::get_extension("wot.tar.gz");
819773
assert_eq!(ext, ".tar.gz");
820774

821-
let (stem, ext) = BlobObject::sanitise_name(".foo.bar");
822-
assert_eq!(stem, "");
775+
let ext = BlobObject::get_extension(".foo.bar");
776+
// TODO: Should be ".bar", filenames starting with "." are for hidden files and this "."
777+
// participates in a file stem.
823778
assert_eq!(ext, ".foo.bar");
824779

825-
let (stem, ext) = BlobObject::sanitise_name("foo?.bar");
826-
assert!(stem.contains("foo"));
827-
assert!(!stem.contains('?'));
780+
let ext = BlobObject::get_extension("foo?.bar");
828781
assert_eq!(ext, ".bar");
829782

830-
let (stem, ext) = BlobObject::sanitise_name("no-extension");
831-
assert_eq!(stem, "no-extension");
783+
let ext = BlobObject::get_extension("no-extension");
832784
assert_eq!(ext, "");
833785

834-
let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c");
786+
let ext = BlobObject::get_extension("path/ignored\\this: is* forbidden?.c");
835787
assert_eq!(ext, ".c");
836-
assert!(!stem.contains("path"));
837-
assert!(!stem.contains("ignored"));
838-
assert!(stem.contains("this"));
839-
assert!(stem.contains("forbidden"));
840-
assert!(!stem.contains('/'));
841-
assert!(!stem.contains('\\'));
842-
assert!(!stem.contains(':'));
843-
assert!(!stem.contains('*'));
844-
assert!(!stem.contains('?'));
845788
}
846789

847790
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -856,7 +799,7 @@ mod tests {
856799
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
857800
let blobdir = t.get_blobdir().to_str().unwrap();
858801
assert!(avatar_blob.starts_with(blobdir));
859-
let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap();
802+
let re = Regex::new("[[:xdigit:]]{16}.jpg$").unwrap();
860803
assert!(re.is_match(&avatar_blob));
861804
let avatar_blob = Path::new(&avatar_blob);
862805
assert!(avatar_blob.exists());
@@ -925,7 +868,7 @@ mod tests {
925868
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
926869
let blobdir = t.get_blobdir().to_str().unwrap();
927870
assert!(avatar_blob.starts_with(blobdir));
928-
let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap();
871+
let re = Regex::new("[[:xdigit:]]{16}.png$").unwrap();
929872
assert!(re.is_match(&avatar_blob));
930873
assert!(Path::new(&avatar_blob).exists());
931874
assert_eq!(

src/chat.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6248,7 +6248,7 @@ mod tests {
62486248
msg.param.get(Param::Filename).unwrap(),
62496249
"harmless_file.txt.exe"
62506250
);
6251-
let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap();
6251+
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt.exe$").unwrap();
62526252
assert!(re.is_match(msg.param.get(Param::File).unwrap()));
62536253
Ok(())
62546254
}

src/mimeparser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3712,7 +3712,7 @@ Message.
37123712
mime_message.parts[0].msg,
37133713
"this is a classic email – I attached the .EML file".to_string()
37143714
);
3715-
let re = Regex::new("^\\$BLOBDIR/-[[:xdigit:]]{16}.eml$").unwrap();
3715+
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.eml$").unwrap();
37163716
assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap()));
37173717

37183718
assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string()));

src/param.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,6 @@ mod tests {
530530
assert!(p.get_blob(Param::File, &t, false).await.is_err());
531531

532532
fs::write(fname, b"boo").await.unwrap();
533-
let blob = p.get_blob(Param::File, &t, true).await.unwrap().unwrap();
534-
assert!(blob.as_file_name().starts_with("foo"));
535533

536534
// Blob in blobdir, expect blob.
537535
let bar_path = t.get_blobdir().join("bar");

src/receive_imf/tests.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,7 +1457,6 @@ async fn test_pdf_filename_simple() {
14571457
assert_eq!(msg.viewtype, Viewtype::File);
14581458
assert_eq!(msg.text, "mail body");
14591459
let file_path = msg.param.get(Param::File).unwrap();
1460-
assert!(file_path.starts_with("$BLOBDIR/simple"));
14611460
assert!(file_path.ends_with(".pdf"));
14621461
}
14631462

@@ -1473,7 +1472,6 @@ async fn test_pdf_filename_continuation() {
14731472
assert_eq!(msg.viewtype, Viewtype::File);
14741473
assert_eq!(msg.text, "mail body");
14751474
let file_path = msg.param.get(Param::File).unwrap();
1476-
assert!(file_path.starts_with("$BLOBDIR/test pdf äöüß"));
14771475
assert!(file_path.ends_with(".pdf"));
14781476
}
14791477

0 commit comments

Comments
 (0)