Skip to content

Commit

Permalink
Add linter (#119)
Browse files Browse the repository at this point in the history
* added rubocop

* applied lint

* added lint check in ci
  • Loading branch information
DmitriyFirsov authored Mar 18, 2024
1 parent 67b5cd1 commit f0c683f
Show file tree
Hide file tree
Showing 21 changed files with 288 additions and 252 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,27 @@ permissions:
contents: read

jobs:
lint:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: "3.1"
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
- name: Run lint
run: bundle exec rubocop

test:

runs-on: ubuntu-latest
strategy:
matrix:
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
needs:
- lint

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ tmp

# Mac finder artifacts
.DS_Store

.idea
27 changes: 27 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Gemspec/RequiredRubyVersion:
Enabled: false

Layout/LineLength:
Enabled: false
Metrics:
Enabled: false
Naming/ConstantName:
Enabled: false

Style/FrozenStringLiteralComment:
Enabled: false
Style/Documentation:
Enabled: false
Style/AndOr:
Enabled: false
Style/StringConcatenation:
Enabled: false
Style/ClassAndModuleChildren:
Enabled: false
Style/OptionalBooleanParameter:
Enabled: false
Style/TernaryParentheses:
EnforcedStyle: require_parentheses_when_complex

Naming/PredicateName:
Enabled: false
4 changes: 2 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require "bundler/gem_tasks"
require 'bundler/gem_tasks'
require 'rspec/core/rake_task'

RSpec::Core::RakeTask.new('spec')

# If you want to make this the default task
task :default => :spec
task default: :spec
28 changes: 14 additions & 14 deletions creek.gemspec
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
# coding: utf-8
lib = File.expand_path('../lib', __FILE__)
lib = File.expand_path('lib', __dir__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require 'creek/version'

Gem::Specification.new do |spec|
spec.name = "creek"
spec.name = 'creek'
spec.version = Creek::VERSION
spec.authors = ["pythonicrubyist"]
spec.email = ["[email protected]"]
spec.description = %q{A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast and efficiently.}
spec.summary = %q{A Ruby gem for parsing large Excel(xlsx and xlsm) files.}
spec.homepage = "https://github.com/pythonicrubyist/creek"
spec.license = "MIT"
spec.authors = ['pythonicrubyist']
spec.email = ['[email protected]']
spec.description = 'A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast and efficiently.'
spec.summary = 'A Ruby gem for parsing large Excel(xlsx and xlsm) files.'
spec.homepage = 'https://github.com/pythonicrubyist/creek'
spec.license = 'MIT'

spec.files = `git ls-files`.split($/)
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
spec.require_paths = ["lib"]
spec.require_paths = ['lib']

spec.required_ruby_version = '>= 2.0.0'

spec.add_development_dependency "bundler"
spec.add_development_dependency "rake"
spec.add_development_dependency 'rspec', '~> 3.6.0'
spec.add_development_dependency 'bundler'
spec.add_development_dependency 'pry-byebug'
spec.add_development_dependency 'rake'
spec.add_development_dependency 'rspec', '~> 3.6.0'
spec.add_development_dependency 'rubocop'

spec.add_dependency 'nokogiri', '>= 1.10.0'
spec.add_dependency 'rubyzip', '>= 1.0.0'
Expand Down
58 changes: 28 additions & 30 deletions lib/creek/book.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ class Creek::Book
DATE_1900 = Date.new(1899, 12, 30).freeze
DATE_1904 = Date.new(1904, 1, 1).freeze

def initialize path, options = {}
def initialize(path, options = {})
check_file_extension = options.fetch(:check_file_extension, true)
if check_file_extension
extension = File.extname(options[:original_filename] || path).downcase
raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension)
raise 'Not a valid file format.' unless ['.xlsx', '.xlsm'].include? extension
end
path = download_file(path) if options[:remote]
@files = Zip::File.open(path)
Expand All @@ -28,28 +28,26 @@ def initialize path, options = {}

def sheets
@sheets ||= begin
doc = @files.file.open "xl/workbook.xml"
doc = @files.file.open 'xl/workbook.xml'
xml = Nokogiri::XML::Document.parse doc
namespaces = xml.namespaces

cssPrefix = ''
css_prefix = ''
namespaces.each do |namespace|
if namespace[1] == 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' && namespace[0] != 'xmlns' then
cssPrefix = namespace[0].split(':')[1]+'|'
end
css_prefix = namespace[0].split(':')[1] + '|' if namespace[1] == 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' && namespace[0] != 'xmlns'
end

rels_doc = @files.file.open "xl/_rels/workbook.xml.rels"
rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship")
xml.css(cssPrefix+'sheet').map do |sheet|
sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target")
rels_doc = @files.file.open 'xl/_rels/workbook.xml.rels'
rels = Nokogiri::XML::Document.parse(rels_doc).css('Relationship')
xml.css(css_prefix + 'sheet').map do |sheet|
sheetfile = rels.find { |el| sheet.attr('r:id') == el.attr('Id') }.attr('Target')
sheet = Sheet.new(
self,
sheet.attr("name"),
sheet.attr("sheetid"),
sheet.attr("state"),
sheet.attr("visible"),
sheet.attr("r:id"),
sheet.attr('name'),
sheet.attr('sheetid'),
sheet.attr('state'),
sheet.attr('visible'),
sheet.attr('r:id'),
sheetfile
)
sheet.with_headers = with_headers
Expand All @@ -68,23 +66,23 @@ def close

def base_date
@base_date ||=
begin
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
# it's set in the Workbook's workbookPr
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
result = DATE_1900 # default
begin
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
# it's set in the Workbook's workbookPr
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
result = DATE_1900 # default

doc = @files.file.open "xl/workbook.xml"
xml = Nokogiri::XML::Document.parse doc
xml.css('workbookPr[date1904]').each do |workbookPr|
if workbookPr['date1904'] =~ /true|1/i
result = DATE_1904
break
doc = @files.file.open 'xl/workbook.xml'
xml = Nokogiri::XML::Document.parse doc
xml.css('workbookPr[date1904]').each do |workbook_pr|
if workbook_pr['date1904'] =~ /true|1/i
result = DATE_1904
break
end
end
end

result
end
result
end
end

private
Expand Down
36 changes: 17 additions & 19 deletions lib/creek/drawing.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def initialize(book, drawing_filepath)
@drawings_rels = []
@images_pathnames = Hash.new { |hash, key| hash[key] = [] }

if file_exist?(@drawing_filepath)
load_drawings_and_rels
load_images_pathnames_by_cells if has_images?
end
return unless file_exist?(@drawing_filepath)

load_drawings_and_rels
load_images_pathnames_by_cells if has_images?
end

##
Expand All @@ -36,13 +36,11 @@ def images_at(cell_name)
return if pathnames_at_coordinate.empty?

pathnames_at_coordinate.map do |image_pathname|
if image_pathname.exist?
image_pathname
else
unless image_pathname.exist?
excel_image_path = "xl/media#{image_pathname.to_path.split(tmpdir).last}"
IO.copy_stream(@book.files.file.open(excel_image_path), image_pathname.to_path)
image_pathname
end
end
image_pathname
end
end

Expand All @@ -52,8 +50,8 @@ def images_at(cell_name)
# Transforms cell name to [row, col], e.g. A1 => [0, 0], B3 => [1, 2]
# Rows and cols start with 0.
def calc_coordinate(cell_name)
col = COLUMNS.index(cell_name.slice /[A-Z]+/)
row = (cell_name.slice /\d+/).to_i - 1 # rows in drawings start with 0
col = COLUMNS.index(cell_name.slice(/[A-Z]+/))
row = cell_name.slice(/\d+/).to_i - 1 # rows in drawings start with 0
[row, col]
end

Expand All @@ -68,7 +66,7 @@ def tmpdir
# Drawing xml contains relationships ID's and coordinates (row, col).
# Drawing relationships xml contains images' locations.
def load_drawings_and_rels
@drawings = parse_xml(@drawing_filepath).css('xdr|twoCellAnchor', 'xdr|oneCellAnchor' )
@drawings = parse_xml(@drawing_filepath).css('xdr|twoCellAnchor', 'xdr|oneCellAnchor')
drawing_rels_filepath = expand_to_rels_path(@drawing_filepath)
@drawings_rels = parse_xml(drawing_rels_filepath).css('Relationships')
end
Expand All @@ -78,11 +76,11 @@ def load_drawings_and_rels
# As multiple images can be located in a single cell, hash values are array of Pathname objects.
# One image can be spread across multiple cells (defined with from-row/to-row/from-col/to-col attributes) - same Pathname object is associated to each row-col combination for the range.
def load_images_pathnames_by_cells
image_selector = 'xdr:pic/xdr:blipFill/a:blip'.freeze
row_from_selector = 'xdr:from/xdr:row'.freeze
row_to_selector = 'xdr:to/xdr:row'.freeze
col_from_selector = 'xdr:from/xdr:col'.freeze
col_to_selector = 'xdr:to/xdr:col'.freeze
image_selector = 'xdr:pic/xdr:blipFill/a:blip'
row_from_selector = 'xdr:from/xdr:row'
row_to_selector = 'xdr:to/xdr:row'
col_from_selector = 'xdr:from/xdr:col'
col_to_selector = 'xdr:to/xdr:col'

@drawings.xpath('//xdr:twoCellAnchor', '//xdr:oneCellAnchor').each do |drawing|
# embed = drawing.xpath(image_selector).first.attributes['embed']
Expand All @@ -91,13 +89,13 @@ def load_images_pathnames_by_cells
next if embed.nil?

rid = embed.value
path = Pathname.new("#{tmpdir}/#{extract_drawing_path(rid).slice(/[^\/]*$/)}")
path = Pathname.new("#{tmpdir}/#{extract_drawing_path(rid).slice(%r{[^/]*$})}")

row_from = drawing.xpath(row_from_selector).text.to_i
col_from = drawing.xpath(col_from_selector).text.to_i

if drawing.name == 'oneCellAnchor'
@images_pathnames[[row_from , col_from ]].push(path)
@images_pathnames[[row_from, col_from]].push(path)
else
row_to = drawing.xpath(row_to_selector).text.to_i
col_to = drawing.xpath(col_to_selector).text.to_i
Expand Down
31 changes: 14 additions & 17 deletions lib/creek/shared_strings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,32 @@
require 'nokogiri'

module Creek

class Creek::SharedStrings

SPREADSHEETML_URI = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'

attr_reader :book, :dictionary

def initialize book
def initialize(book)
@book = book
parse_shared_shared_strings
end

def parse_shared_shared_strings
path = "xl/sharedStrings.xml"
if @book.files.file.exist?(path)
doc = @book.files.file.open path
xml = Nokogiri::XML::Document.parse doc
parse_shared_string_from_document(xml)
end
path = 'xl/sharedStrings.xml'
return unless @book.files.file.exist?(path)

doc = @book.files.file.open path
xml = Nokogiri::XML::Document.parse doc
parse_shared_string_from_document(xml)
end

def parse_shared_string_from_document(xml)
@dictionary = self.class.parse_shared_string_from_document(xml)
end

def self.parse_shared_string_from_document(xml)
dictionary = Hash.new
namespace = xml.namespaces.detect{|_key, uri| uri == SPREADSHEETML_URI }
dictionary = {}
namespace = xml.namespaces.detect { |_key, uri| uri == SPREADSHEETML_URI }
prefix = if namespace && namespace[0].start_with?('xmlns:')
namespace[0].delete_prefix('xmlns:') + '|'
else
Expand All @@ -42,15 +40,14 @@ def self.parse_shared_string_from_document(xml)

xml.css(node_selector).each_with_index do |si, idx|
text_nodes = si.css(text_selector)
if text_nodes.count == 1 # plain text node
dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
else # rich text nodes with text fragments
dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
end
dictionary[idx] = if text_nodes.count == 1 # plain text node
Creek::Styles::Converter.unescape_string(text_nodes.first.content)
else # rich text nodes with text fragments
text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
end
end

dictionary
end

end
end
Loading

0 comments on commit f0c683f

Please sign in to comment.