Skip to content

Commit

Permalink
Guess file content type when not specified (#655)
Browse files Browse the repository at this point in the history

Co-authored-by: Martin Durant <[email protected]>
  • Loading branch information
kujenga and martindurant authored Jan 6, 2025
1 parent e3ebfb9 commit f978106
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
14 changes: 11 additions & 3 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""
Google Cloud Storage pythonic interface
"""

import asyncio
import io
import json
import logging
import mimetypes
import os
import posixpath
import re
Expand Down Expand Up @@ -1391,7 +1393,7 @@ async def _put_file(
rpath,
metadata=None,
consistency=None,
content_type="application/octet-stream",
content_type=None,
chunksize=50 * 2**20,
callback=None,
fixed_key_metadata=None,
Expand All @@ -1401,6 +1403,10 @@ async def _put_file(
# enforce blocksize should be a multiple of 2**18
if os.path.isdir(lpath):
return
if content_type is None:
content_type, _ = mimetypes.guess_type(lpath)
if content_type is None:
content_type = "application/octet-stream"
callback = callback or NoOpCallback()
consistency = consistency or self.consistency
checker = get_consistency_checker(consistency)
Expand Down Expand Up @@ -1755,7 +1761,8 @@ def __init__(
the number we wrote; 'md5' does a full checksum. Any value other
than 'size' or 'md5' or 'crc32c' is assumed to mean no checking.
content_type: str
default is `application/octet-stream`. See the list of available
default when unspecified is provided by mimetypes.guess_type or
otherwise `application/octet-stream`. See the list of available
content types at https://www.iana.org/assignments/media-types/media-types.txt
metadata: dict
Custom metadata, in key/value pairs, added at file creation
Expand Down Expand Up @@ -1798,7 +1805,8 @@ def __init__(
else:
det = {}
self.content_type = content_type or det.get(
"contentType", "application/octet-stream"
"contentType",
mimetypes.guess_type(self.path)[0] or "application/octet-stream",
)
self.metadata = metadata or det.get("metadata", {})
self.fixed_key_metadata = _convert_fixed_key_metadata(det, from_google=True)
Expand Down
31 changes: 30 additions & 1 deletion gcsfs/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,36 @@ def test_array(gcs):
assert out == b"A" * 1000


def test_content_type_set(gcs):
fn = TEST_BUCKET + "/content_type"
with gcs.open(fn, "wb", content_type="text/html") as f:
f.write(b"<html>")
assert gcs.info(fn)["contentType"] == "text/html"


def test_content_type_guess(gcs):
fn = TEST_BUCKET + "/content_type.txt"
with gcs.open(fn, "wb") as f:
f.write(b"zz")
assert gcs.info(fn)["contentType"] == "text/plain"


def test_content_type_default(gcs):
fn = TEST_BUCKET + "/content_type.abcdef"
with gcs.open(fn, "wb") as f:
f.write(b"zz")
assert gcs.info(fn)["contentType"] == "application/octet-stream"


def test_content_type_put_guess(gcs):
dst = TEST_BUCKET + "/content_type_put_guess"
with tmpfile(extension="txt") as fn:
with open(fn, "w") as f:
f.write("zz")
gcs.put(fn, f"gs://{dst}", b"")
assert gcs.info(dst)["contentType"] == "text/plain"


def test_attrs(gcs):
if not gcs.on_google:
# https://github.com/fsspec/gcsfs/pull/479
Expand Down Expand Up @@ -1194,7 +1224,6 @@ def test_dir_marker(gcs):


def test_mkdir_with_path(gcs):

with pytest.raises(FileNotFoundError):
gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=False)
assert not gcs.exists(f"{TEST_BUCKET + 'new'}")
Expand Down

0 comments on commit f978106

Please sign in to comment.