Skip to content

Commit b69f7d6

Browse files
committed
Isolated offset code behind the capture_offsets option and enabled import tracing
1 parent a4ac09f commit b69f7d6

8 files changed

+145
-115
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ parser.to_s
5555
body { margin: 0 1em; }
5656
```
5757

58+
# capturing byte offsets within a file
59+
parser.load_uri!('../style.css', {:base_uri => 'http://example.com/styles/inc/', :capture_offsets => true)
60+
content_rule = parser.find_rule_sets(['#content']).first
61+
content_rule.filename
62+
#=> 'http://example.com/styles/styles.css'
63+
content_rule.offset
64+
#=> (10703..10752)
65+
5866
# Testing
5967

6068
```Bash

Rakefile

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,29 @@ desc 'Run the unit tests.'
77
Rake::TestTask.new(:default) do |test|
88
test.verbose = true
99
end
10+
11+
desc 'Run a performance evaluation.'
12+
task :benchmark do
13+
require 'benchmark'
14+
require 'css_parser'
15+
16+
base_dir = File.dirname(__FILE__) + '/test/fixtures'
17+
18+
# parse the import1 file to benchmark file loading
19+
time = Benchmark.measure do
20+
10000.times do
21+
parser = CssParser::Parser.new
22+
parser.load_file!('import1.css', base_dir)
23+
end
24+
end
25+
puts "Parsing 'import1.css' 10 000 times took #{time.real.round(4)} seconds"
26+
27+
# parse the import1 file to benchmark rule parsing
28+
time = Benchmark.measure do
29+
1000.times do
30+
parser = CssParser::Parser.new
31+
parser.load_file!('complex.css', base_dir)
32+
end
33+
end
34+
puts "Parsing 'complex.css' 1 000 times took #{time.real.round(4)} seconds"
35+
end

lib/css_parser/parser.rb

Lines changed: 95 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ class << self; attr_reader :folded_declaration_cache; end
3636
def initialize(options = {})
3737
@options = {:absolute_paths => false,
3838
:import => true,
39-
:io_exceptions => true}.merge(options)
39+
:io_exceptions => true,
40+
:capture_offsets => false}.merge(options)
4041

4142
# array of RuleSets
4243
@rules = []
@@ -117,7 +118,7 @@ def add_block!(block, options = {})
117118
options[:media_types] = [options[:media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt)}
118119
options[:only_media_types] = [options[:only_media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt)}
119120

120-
block = cleanup_block(block)
121+
block = cleanup_block(block, options)
121122

122123
if options[:base_uri] and @options[:absolute_paths]
123124
block = CssParser.convert_uris(block, options[:base_uri])
@@ -139,26 +140,41 @@ def add_block!(block, options = {})
139140

140141
import_path = import_rule[0].to_s.gsub(/['"]*/, '').strip
141142

143+
import_options = { :media_types => media_types }
144+
import_options[:capture_offsets] = true if options[:capture_offsets]
145+
142146
if options[:base_uri]
143147
import_uri = Addressable::URI.parse(options[:base_uri].to_s) + Addressable::URI.parse(import_path)
144-
load_uri!(import_uri, options[:base_uri], media_types)
148+
import_options[:base_uri] = options[:base_uri]
149+
load_uri!(import_uri, import_options)
145150
elsif options[:base_dir]
146-
load_file!(import_path, options[:base_dir], media_types)
151+
import_options[:base_dir] = options[:base_dir]
152+
load_file!(import_path, import_options)
147153
end
148154
end
149155
end
150156

151157
# Remove @import declarations
152-
block.gsub!(RE_AT_IMPORT_RULE) { |m| ' ' * m.length }
158+
block = remove_all(block, RE_AT_IMPORT_RULE, options)
153159

154160
parse_block_into_rule_sets!(block, options)
155161
end
156162

157163
# Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
158164
#
159165
# +media_types+ can be a symbol or an array of symbols.
160-
def add_rule!(selectors, declarations, media_types = :all, offset = nil)
161-
rule_set = RuleSet.new(selectors, declarations, nil, offset)
166+
def add_rule!(selectors, declarations, media_types = :all)
167+
rule_set = RuleSet.new(selectors, declarations)
168+
add_rule_set!(rule_set, media_types)
169+
end
170+
171+
# Add a CSS rule by setting the +selectors+, +declarations+, +uri+, +offset+ and +media_types+.
172+
#
173+
# +uri+ can be a string or uri pointing to the file or url location.
174+
# +offset+ should be Range object representing the start and end byte locations where the rule was found in the file.
175+
# +media_types+ can be a symbol or an array of symbols.
176+
def add_file_rule!(selectors, declarations, uri, offset, media_types = :all)
177+
rule_set = FileRuleSet.new(uri, offset, selectors, declarations)
162178
add_rule_set!(rule_set, media_types)
163179
end
164180

@@ -289,16 +305,15 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
289305
current_media_query = ''
290306
current_declarations = ''
291307

292-
# once we are in a rule, we will use this to store where we started
308+
# once we are in a rule, we will use this to store where we started if we are capturing offsets
293309
rule_start = nil
294310
offset = nil
295311

296312
block.scan(/(([\\]{2,})|([\\]?[{}\s"])|(.[^\s"{}\\]*))/) do |matches|
297-
# encode here because it can affect the length of the string
298-
token = matches[0].encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
313+
token = matches[0]
299314

300-
# save the regex offset so tat we know where in the file we are
301-
offset = Regexp.last_match.offset(0)
315+
# save the regex offset so that we know where in the file we are
316+
offset = Regexp.last_match.offset(0) if options[:capture_offsets]
302317

303318
if token =~ /\A"/ # found un-escaped double quote
304319
in_string = !in_string
@@ -324,13 +339,18 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
324339
in_declarations -= 1
325340

326341
unless current_declarations.strip.empty?
327-
add_rule!(current_selectors, current_declarations, current_media_queries, (rule_start..offset.last))
342+
if options[:capture_offsets]
343+
add_file_rule!(current_selectors, current_declarations, options[:filename], (rule_start..offset.last), current_media_queries)
344+
else
345+
add_rule!(current_selectors, current_declarations, current_media_queries)
346+
end
328347
end
329348

330-
# restart our search for selectors and declarations
331-
rule_start = nil
332349
current_selectors = ''
333350
current_declarations = ''
351+
352+
# restart our search for selectors and declarations
353+
rule_start = nil if options[:capture_offsets]
334354
end
335355
elsif token =~ /@media/i
336356
# found '@media', reset current media_types
@@ -366,23 +386,26 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
366386
end
367387
else
368388
if token =~ /\{/ and not in_string
369-
current_selectors.gsub!(/^[\s]*/, '')
370-
current_selectors.gsub!(/[\s]*$/, '')
389+
current_selectors.strip!
371390
in_declarations += 1
372391
else
373-
# if we are in a selector, add the token to te current selectors
392+
# if we are in a selector, add the token to the current selectors
374393
current_selectors += token
375394

376395
# mark this as the beginning of the selector unless we have already marked it
377-
rule_start = offset.first if rule_start.nil? && token =~ /^[^\s]+$/
396+
rule_start = offset.first if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
378397
end
379398
end
380399
end
381400
end
382401

383402
# check for unclosed braces
384403
if in_declarations > 0
385-
add_rule!(current_selectors, current_declarations, current_media_queries, (rule_start..offset.last))
404+
if options[:capture_offsets]
405+
add_file_rule!(current_selectors, current_declarations, options[:filename], (rule_start..offset.last), current_media_queries)
406+
else
407+
add_rule!(current_selectors, current_declarations, current_media_queries)
408+
end
386409
end
387410
end
388411

@@ -395,7 +418,6 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
395418
# Deprecated: originally accepted three params: `uri`, `base_uri` and `media_types`
396419
def load_uri!(uri, options = {}, deprecated = nil)
397420
uri = Addressable::URI.parse(uri) unless uri.respond_to? :scheme
398-
#base_uri = nil, media_types = :all, options = {}
399421

400422
opts = {:base_uri => nil, :media_types => :all}
401423

@@ -413,22 +435,46 @@ def load_uri!(uri, options = {}, deprecated = nil)
413435

414436
opts[:base_uri] = uri if opts[:base_uri].nil?
415437

438+
# pass on the uri if we are capturing file offsets
439+
opts[:filename] = uri.to_s if opts[:capture_offsets]
440+
416441
src, = read_remote_file(uri) # skip charset
417442
if src
418443
add_block!(src, opts)
419444
end
420445
end
421446

422447
# Load a local CSS file.
423-
def load_file!(file_name, base_dir = nil, media_types = :all)
424-
file_name = File.expand_path(file_name, base_dir)
448+
def load_file!(file_name, options = {}, deprecated = nil)
449+
opts = {:base_dir => nil, :media_types => :all}
450+
451+
if options.is_a? Hash
452+
opts.merge!(options)
453+
else
454+
opts[:base_dir] = options if options.is_a? String
455+
opts[:media_types] = deprecated if deprecated
456+
end
457+
458+
file_name = File.expand_path(file_name, opts[:base_dir])
425459
return unless File.readable?(file_name)
426460
return unless circular_reference_check(file_name)
427461

428-
src = IO.read(file_name)
429-
base_dir = File.dirname(file_name)
462+
# using open takes a little longer than IO.read but retains line-breaks consistently
463+
# across platforms which is important when capturing offsets
464+
if opts[:capture_offsets]
465+
fh = open(file_name, 'rb')
466+
src = fh.read
467+
fh.close
430468

431-
add_block!(src, {:media_types => media_types, :base_dir => base_dir})
469+
# pass on the file name if we are capturing file offsets
470+
opts[:filename] = file_name
471+
else
472+
src = IO.read(file_name)
473+
end
474+
475+
opts[:base_dir] = File.dirname(file_name)
476+
477+
add_block!(src, opts)
432478
end
433479

434480
# Load a local CSS string.
@@ -454,16 +500,33 @@ def circular_reference_check(path)
454500
end
455501
end
456502

503+
# Remove a pattern from a given string
504+
#
505+
# Returns a string.
506+
def remove_all(css, regex, options)
507+
# if we are capturing file offsets, replace the characters with spaces to retail the original positions
508+
return css.gsub(regex) { |m| ' ' * m.length } if options[:capture_offsets]
509+
510+
# otherwise just strip it out
511+
css.gsub(regex, '')
512+
end
513+
457514
# Strip comments and clean up blank lines from a block of CSS.
458515
#
459516
# Returns a string.
460-
def cleanup_block(block) # :nodoc:
461-
# Strip CSS comments but make sure the string stays the same length so that we can retain byte offsets
462-
block.gsub!(STRIP_CSS_COMMENTS_RX) { |m| ' ' * m.length }
517+
def cleanup_block(block, options = {}) # :nodoc:
518+
# Strip CSS comments
519+
utf8_block = block.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: ' '))
520+
utf8_block = remove_all(utf8_block, STRIP_CSS_COMMENTS_RX, options)
463521

464522
# Strip HTML comments - they shouldn't really be in here but
465523
# some people are just crazy...
466-
block.gsub(STRIP_HTML_COMMENTS_RX) { |m| ' ' * m.length }
524+
utf8_block = remove_all(utf8_block, STRIP_HTML_COMMENTS_RX, options)
525+
526+
# Strip lines containing just whitespace
527+
utf8_block.gsub!(/^\s+$/, "") unless options[:capture_offsets]
528+
529+
utf8_block
467530
end
468531

469532
# Download a file into a string.
@@ -491,8 +554,9 @@ def read_remote_file(uri) # :nodoc:
491554

492555
src = '', charset = nil
493556

494-
uri = Addressable::URI.parse(uri.to_s)
495557
begin
558+
uri = Addressable::URI.parse(uri.to_s)
559+
496560
if uri.scheme == 'file'
497561
# local file
498562
path = uri.path

lib/css_parser/rule_set.rb

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,14 @@ class RuleSet
1010
# Array of selector strings.
1111
attr_reader :selectors
1212

13-
# File offset range
14-
attr_reader :offset
15-
1613
# Integer with the specificity to use for this RuleSet.
1714
attr_accessor :specificity
1815

19-
def initialize(selectors, block, specificity = nil, offset = nil)
16+
def initialize(selectors, block, specificity = nil)
2017
@selectors = []
2118
@specificity = specificity
2219
@declarations = {}
2320
@order = 0
24-
@offset = offset
2521
parse_selectors!(selectors) if selectors
2622
parse_declarations!(block)
2723
end
@@ -517,4 +513,19 @@ def parse_selectors!(selectors) # :nodoc:
517513
@selectors = selectors.split(',').map { |s| s.gsub(/\s+/, ' ').strip }
518514
end
519515
end
516+
517+
class FileRuleSet < RuleSet
518+
519+
# File offset range
520+
attr_reader :offset
521+
522+
# the local or remote location
523+
attr_accessor :filename
524+
525+
def initialize(filename, offset, selectors, block, specificity = nil)
526+
super(selectors, block, specificity)
527+
@offset = offset
528+
@filename = filename
529+
end
530+
end
520531
end

test/test_css_parser_basic.rb

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,4 @@ def test_converting_to_hash
6767
hash = @cp.to_h
6868
assert_equal 'blue', hash['all']['div']['color']
6969
end
70-
71-
def test_accessing_file_offsets
72-
@cp.add_block!(@css)
73-
i = 0
74-
offsets = [(6..36), (43..62), (69..111), (118..142)]
75-
@cp.each_rule_set do |rule_set, media_types|
76-
assert_equal offsets[i], rule_set.offset
77-
i += 1
78-
end
79-
end
8070
end

test/test_css_parser_loading.rb

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,6 @@ def teardown
4343
def test_loading_301_redirect
4444
@cp.load_uri!("#{@uri_base}/redirect301")
4545
assert_equal 'margin: 0px;', @cp.find_by_selector('p').join(' ')
46-
47-
# check rule offsets
48-
i = 0
49-
# accommodate for different encodings between windows and unix
50-
offsets = Gem.win_platform? ? [(0..46), (50..68)] : [(0..43), (45..63)]
51-
@cp.each_rule_set do |rule_set, media_types|
52-
assert_equal offsets[i], rule_set.offset
53-
i += 1
54-
end
5546
end
5647

5748
def test_loading_302_redirect
@@ -84,12 +75,6 @@ def test_loading_a_remote_file_over_ssl
8475
else
8576
@cp.load_uri!("https://dialect.ca/inc/screen.css")
8677
assert_match( /margin\: 0\;/, @cp.find_by_selector('body').join(' ') )
87-
88-
# there are a lot of rules in this file, but check some rule offsets
89-
rules = @cp.find_rule_sets(['#container', '#name_case_converter textarea'])
90-
assert_equal 2, rules.count
91-
assert_equal (2172..2227), rules.first.offset
92-
assert_equal (10703..10752), rules.last.offset
9378
end
9479
end
9580

0 commit comments

Comments
 (0)