Skip to content

Commit 947e9c9

Browse files
committed
Strip CRLF from headers before parsing
1 parent 904d08b commit 947e9c9

File tree

3 files changed

+96
-73
lines changed

3 files changed

+96
-73
lines changed

changelog/68328.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed cp.cache_file when using Tornado > 6.4

salt/fileclient.py

Lines changed: 73 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,76 @@ def get_dir(self, path, dest="", saltenv="base", gzip=None, cachedir=None):
463463
ret.sort()
464464
return ret
465465

466+
def _on_header(self, hdr, write_body, use_etag, dest_etag):
467+
hdr = hdr.strip()
468+
if write_body[1] is not False and (
469+
write_body[2] is None or (use_etag and write_body[3] is None)
470+
):
471+
if not hdr and "Content-Type" not in write_body[1]:
472+
# If write_body[0] is True, then we are not following a
473+
# redirect (initial response was a 200 OK). So there is
474+
# no need to reset write_body[0].
475+
if write_body[0] is not True:
476+
# We are following a redirect, so we need to reset
477+
# write_body[0] so that we properly follow it.
478+
write_body[0] = None
479+
# We don't need the HTTPHeaders object anymore
480+
if not use_etag or write_body[3]:
481+
write_body[1] = False
482+
return
483+
# Try to find out what content type encoding is used if
484+
# this is a text file
485+
write_body[1].parse_line(hdr) # pylint: disable=no-member
486+
# Case insensitive Etag header checking below. Don't break case
487+
# insensitivity unless you really want to mess with people's heads
488+
# in the tests. Note: http.server and apache2 use "Etag" and nginx
489+
# uses "ETag" as the header key. Yay standards!
490+
if use_etag and "etag" in map(str.lower, write_body[1]):
491+
etag = write_body[3] = [
492+
val for key, val in write_body[1].items() if key.lower() == "etag"
493+
][0]
494+
with salt.utils.files.fopen(dest_etag, "w") as etagfp:
495+
etag = etagfp.write(etag)
496+
elif "Content-Type" in write_body[1]:
497+
content_type = write_body[1].get(
498+
"Content-Type"
499+
) # pylint: disable=no-member
500+
if not content_type.startswith("text"):
501+
write_body[2] = False
502+
if not use_etag or write_body[3]:
503+
write_body[1] = False
504+
else:
505+
encoding = "utf-8"
506+
fields = content_type.split(";")
507+
for field in fields:
508+
if "encoding" in field:
509+
encoding = field.split("encoding=")[-1]
510+
write_body[2] = encoding
511+
# We have found our encoding. Stop processing headers.
512+
if not use_etag or write_body[3]:
513+
write_body[1] = False
514+
515+
# If write_body[0] is False, this means that this
516+
# header is a 30x redirect, so we need to reset
517+
# write_body[0] to None so that we parse the HTTP
518+
# status code from the redirect target. Additionally,
519+
# we need to reset write_body[2] so that we inspect the
520+
# headers for the Content-Type of the URL we're
521+
# following.
522+
if write_body[0] is write_body[1] is False:
523+
write_body[0] = write_body[2] = None
524+
525+
# Check the status line of the HTTP request
526+
if write_body[0] is None:
527+
try:
528+
hdr_response = parse_response_start_line(hdr)
529+
except HTTPInputError:
530+
log.debug("Unable to parse header: %r", hdr.strip())
531+
# Not the first line, do nothing
532+
return
533+
write_body[0] = hdr_response.code not in [301, 302, 303, 307]
534+
write_body[1] = HTTPHeaders()
535+
466536
def get_url(
467537
self,
468538
url,
@@ -681,78 +751,6 @@ def swift_opt(key, default):
681751
# both content encoding and etag are found.
682752
write_body = [None, False, None, None]
683753

684-
def on_header(hdr):
685-
686-
if write_body[1] is not False and (
687-
write_body[2] is None or (use_etag and write_body[3] is None)
688-
):
689-
if not hdr.strip() and "Content-Type" not in write_body[1]:
690-
# If write_body[0] is True, then we are not following a
691-
# redirect (initial response was a 200 OK). So there is
692-
# no need to reset write_body[0].
693-
if write_body[0] is not True:
694-
# We are following a redirect, so we need to reset
695-
# write_body[0] so that we properly follow it.
696-
write_body[0] = None
697-
# We don't need the HTTPHeaders object anymore
698-
if not use_etag or write_body[3]:
699-
write_body[1] = False
700-
return
701-
# Try to find out what content type encoding is used if
702-
# this is a text file
703-
write_body[1].parse_line(hdr) # pylint: disable=no-member
704-
# Case insensitive Etag header checking below. Don't break case
705-
# insensitivity unless you really want to mess with people's heads
706-
# in the tests. Note: http.server and apache2 use "Etag" and nginx
707-
# uses "ETag" as the header key. Yay standards!
708-
if use_etag and "etag" in map(str.lower, write_body[1]):
709-
etag = write_body[3] = [
710-
val
711-
for key, val in write_body[1].items()
712-
if key.lower() == "etag"
713-
][0]
714-
with salt.utils.files.fopen(dest_etag, "w") as etagfp:
715-
etag = etagfp.write(etag)
716-
elif "Content-Type" in write_body[1]:
717-
content_type = write_body[1].get(
718-
"Content-Type"
719-
) # pylint: disable=no-member
720-
if not content_type.startswith("text"):
721-
write_body[2] = False
722-
if not use_etag or write_body[3]:
723-
write_body[1] = False
724-
else:
725-
encoding = "utf-8"
726-
fields = content_type.split(";")
727-
for field in fields:
728-
if "encoding" in field:
729-
encoding = field.split("encoding=")[-1]
730-
write_body[2] = encoding
731-
# We have found our encoding. Stop processing headers.
732-
if not use_etag or write_body[3]:
733-
write_body[1] = False
734-
735-
# If write_body[0] is False, this means that this
736-
# header is a 30x redirect, so we need to reset
737-
# write_body[0] to None so that we parse the HTTP
738-
# status code from the redirect target. Additionally,
739-
# we need to reset write_body[2] so that we inspect the
740-
# headers for the Content-Type of the URL we're
741-
# following.
742-
if write_body[0] is write_body[1] is False:
743-
write_body[0] = write_body[2] = None
744-
745-
# Check the status line of the HTTP request
746-
if write_body[0] is None:
747-
try:
748-
hdr = parse_response_start_line(hdr.strip())
749-
except HTTPInputError as exc:
750-
log.trace("Unable to parse header: %r", hdr.strip())
751-
# Not the first line, do nothing
752-
return
753-
write_body[0] = hdr.code not in [301, 302, 303, 307]
754-
write_body[1] = HTTPHeaders()
755-
756754
if no_cache:
757755
result = []
758756

@@ -786,7 +784,9 @@ def on_chunk(chunk):
786784
fixed_url,
787785
stream=True,
788786
streaming_callback=on_chunk,
789-
header_callback=on_header,
787+
header_callback=lambda header: self._on_header(
788+
header, write_body, use_etag, dest_etag
789+
),
790790
username=url_data.username,
791791
password=url_data.password,
792792
opts=self.opts,

tests/pytests/unit/fileclient/test_fileclient.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,28 @@ def test_setstate(file_client, mocked_opts):
237237
assert file_client.opts == mocked_opts
238238

239239

240+
@pytest.mark.parametrize(
241+
"header,result",
242+
[
243+
("HTTP/1.1 200 OK\r\n", True),
244+
("HTTP/1.1 200 OK\n\n", True),
245+
("HTTP/1.1 200 OK", True),
246+
("HTTP/1.1 200", None),
247+
("HTT/1.1 200 OK\r\n", None),
248+
("HTT/1.1 200 OK", None),
249+
],
250+
)
251+
def test_on_header(file_client, header, result):
252+
"""
253+
Test that on_header function works with HTTP headers
254+
with or without the trailing whitespace.
255+
"""
256+
write_body = [None, False, None, None]
257+
258+
file_client._on_header(header, write_body, False, False)
259+
assert write_body[0] == result
260+
261+
240262
def test_get_url_with_hash(client_opts):
241263
"""
242264
Test get_url function with a URL containing a hash character.

0 commit comments

Comments
 (0)