diff --git a/lib/creek/book.rb b/lib/creek/book.rb index fae6090..e1a45be 100644 --- a/lib/creek/book.rb +++ b/lib/creek/book.rb @@ -8,7 +8,9 @@ class Creek::Book attr_reader :files, :sheets, :shared_strings, - :with_headers + :with_headers, + :workbook_rels_by_type, + :workbook_rels_by_id DATE_1900 = Date.new(1899, 12, 30).freeze DATE_1904 = Date.new(1904, 1, 1).freeze @@ -21,12 +23,14 @@ def initialize path, options = {} end path = download_file(path) if options[:remote] @files = Zip::File.open(path) + parse_workbook_path + parse_workbook_rels @shared_strings = SharedStrings.new(self) @with_headers = options.fetch(:with_headers, false) end def sheets - doc = @files.file.open "xl/workbook.xml" + doc = @files.file.open @workbook_path xml = Nokogiri::XML::Document.parse doc namespaces = xml.namespaces @@ -37,10 +41,8 @@ def sheets end end - rels_doc = @files.file.open "xl/_rels/workbook.xml.rels" - rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship") @sheets = xml.css(cssPrefix+'sheet').map do |sheet| - sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target") + sheetfile = @workbook_rels_by_id[sheet.attr("r:id")] sheet = Sheet.new( self, sheet.attr("name"), @@ -71,7 +73,7 @@ def base_date # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx result = DATE_1900 # default - doc = @files.file.open "xl/workbook.xml" + doc = @files.file.open @workbook_path xml = Nokogiri::XML::Document.parse doc xml.css('workbookPr[date1904]').each do |workbookPr| if workbookPr['date1904'] =~ /true|1/i @@ -98,5 +100,27 @@ def download_file(url) downloaded.path end end + + def parse_workbook_path + rels_file = @files.file.open '_rels/.rels' + rels_xml = Nokogiri::XML::Document.parse(rels_file).css('Relationship') + rel = rels_xml.find { |el| el.attr('Type') == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" } + @workbook_path = rel.attr('Target') + rescue Errno::ENOENT + @workbook_path = 'xl/workbook.xml' + end + + def parse_workbook_rels + @workbook_rels_by_id = {} + @workbook_rels_by_type = {} + workbook_dirname, slash, workbook_basename = @workbook_path.rpartition('/') + workbook_rels_file = @files.file.open "#{workbook_dirname}#{slash}_rels/#{workbook_basename}.rels" + Nokogiri::XML::Document.parse(workbook_rels_file).css('Relationship').each do |rel| + target = rel.attr('Target') + target = "#{workbook_dirname}#{slash}#{target}" unless target.start_with?('/') + @workbook_rels_by_id[rel.attr('Id')] = target + @workbook_rels_by_type[rel.attr('Type')] = target + end + end end end diff --git a/lib/creek/shared_strings.rb b/lib/creek/shared_strings.rb index dd5c922..561d426 100644 --- a/lib/creek/shared_strings.rb +++ b/lib/creek/shared_strings.rb @@ -13,8 +13,8 @@ def initialize book end def parse_shared_shared_strings - path = "xl/sharedStrings.xml" - if @book.files.file.exist?(path) + path = @book.workbook_rels_by_type["http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"] + if path doc = @book.files.file.open path xml = Nokogiri::XML::Document.parse doc parse_shared_string_from_document(xml) diff --git a/lib/creek/styles.rb b/lib/creek/styles.rb index d4681e9..ffbba36 100644 --- a/lib/creek/styles.rb +++ b/lib/creek/styles.rb @@ -6,12 +6,12 @@ def initialize(book) end def path - "xl/styles.xml" + @book.workbook_rels_by_type["http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"] end def styles_xml @styles_xml ||= begin - if @book.files.file.exist?(path) + if path doc = @book.files.file.open path Nokogiri::XML::Document.parse doc end diff --git a/spec/fixtures/test_structure.xlsx b/spec/fixtures/test_structure.xlsx new file mode 100644 index 0000000..9b21b95 Binary files /dev/null and b/spec/fixtures/test_structure.xlsx differ diff --git a/spec/test_spec.rb b/spec/test_spec.rb index dabb932..1dc1619 100644 --- a/spec/test_spec.rb +++ b/spec/test_spec.rb @@ -166,3 +166,21 @@ expect(rows.map{|r| r['cells']}).to eq(@expected_rows) end end + +describe 'Creek parsing a file with unusual structure.' do + before(:all) do + @creek = Creek::Book.new 'spec/fixtures/test_structure.xlsx' + @expected_rows = [ + {"A1"=>"date", "B1"=>"dimension", "C1"=>"metric_1", "D1"=>"metric_2", "E1"=>"metric_3"}, + {"A2"=>"2022-02-27", "B2"=>"A", "C2"=>"1", "D2"=>"5.30", "E2"=>"11:11:11"} + ] + end + + after(:all) do + @creek.close + end + + it 'Parse rows successfully.' do + expect(@creek.sheets[0].rows.to_a).to eq(@expected_rows) + end +end