Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b706d91

Browse files
committedMay 17, 2024
CSV.open: detect BOM by default
Fix GH-301 Reported by Junichi Ito. Thanks!!!
1 parent 4e19f3d commit b706d91

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed
 

‎lib/csv.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1581,7 +1581,14 @@ def generate_lines(rows, **options)
15811581
def open(filename, mode="r", **options)
15821582
# wrap a File opened with the remaining +args+ with no newline
15831583
# decorator
1584-
file_opts = options.dup
1584+
file_opts = {}
1585+
have_encoding_options = (options.key?(:encoding) or
1586+
options.key?(:external_encoding) or
1587+
mode.include?(":"))
1588+
if not have_encoding_options and Encoding.default_external == Encoding::UTF_8
1589+
file_opts[:encoding] = "bom|utf-8"
1590+
end
1591+
file_opts.merge!(options)
15851592
unless file_opts.key?(:newline)
15861593
file_opts[:universal_newline] ||= false
15871594
end

‎test/csv/interface/test_read.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,16 @@ def test_open_with_newline
215215
end
216216
end
217217

218+
def test_open_with_bom
219+
csv_data = @input.read
220+
bom = "\ufeff" # U+FEFF ZERO WIDTH NO-BREAK SPACE
221+
File.binwrite(@input.path, "#{bom}#{csv_data}")
222+
@input.rewind
223+
CSV.open(@input.path, col_sep: "\t") do |csv|
224+
assert_equal(@rows, csv.to_a)
225+
end
226+
end
227+
218228
def test_parse
219229
assert_equal(@rows,
220230
CSV.parse(@data, col_sep: "\t", row_sep: "\r\n"))

0 commit comments

Comments
 (0)
Please sign in to comment.