Skip to content

Commit

Permalink
update remap method to remove undefined mapping properties (#53)
Browse files Browse the repository at this point in the history
If a field is not explicitly defined in an index's mapping we should not
persist it via the documents we copy over as part of remap.  This allows
for a way to get around the limitation in lucene where you can't delete
fields from index mappings without recreating the index from scratch.
  • Loading branch information
DanielNill authored Mar 12, 2018
1 parent e75d5a9 commit aa3c8c3
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 6 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
v0.10.0
- update remap to removing fields from the mapping that are not explicitly
defined.
v0.9.1
- fix search enumerator, missing first result set
v0.8.3
Expand Down
9 changes: 8 additions & 1 deletion lib/elasticity/strategies/alias_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,17 @@ def remap(index_def)
docs = @client.mget(body: { docs: id_docs }, refresh: true)["docs"]
break if docs.empty?

# Modify document hashes to match the mapping definition so that legacy fields aren't added
defined_mapping_fields = index_def[:mappings][docs.first["_type"]]["properties"].keys

# Move only documents that still exists on the old index, into the new index.
ops = []
docs.each do |doc|
ops << { index: { _index: new_index, _type: doc["_type"], _id: doc["_id"], data: doc["_source"] } } if doc["found"]
if doc["found"]
legacy_fields = doc["_source"].keys - defined_mapping_fields
legacy_fields.each { |field| doc["_source"].delete(field) }
ops << { index: { _index: new_index, _type: doc["_type"], _id: doc["_id"], data: doc["_source"] } }
end
end

@client.bulk(body: ops)
Expand Down
2 changes: 1 addition & 1 deletion lib/elasticity/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Elasticity
VERSION = "0.9.1"
VERSION = "0.10.0"
end
52 changes: 48 additions & 4 deletions spec/functional/persistence_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def self.name
c.strategy = Elasticity::Strategies::SingleIndex

c.mapping = {
properties: {
"properties" => {
name: { type: "string", index: "not_analyzed" },
birthdate: { type: "date" },
},
Expand Down Expand Up @@ -85,7 +85,7 @@ class Cat < Animal
c.index_base_name = "cats_and_dogs"
c.strategy = Elasticity::Strategies::SingleIndex
c.document_type = "cat"
c.mapping = { properties: {
c.mapping = { "properties" => {
name: { type: "string", index: "not_analyzed" },
age: { type: "integer" }
} }
Expand All @@ -103,7 +103,7 @@ class Dog < Animal
c.index_base_name = "cats_and_dogs"
c.strategy = Elasticity::Strategies::SingleIndex
c.document_type = "dog"
c.mapping = { properties: {
c.mapping = { "properties" => {
name: { type: "string", index: "not_analyzed" },
age: { type: "integer" },
hungry: { type: "boolean" }
Expand Down Expand Up @@ -165,7 +165,7 @@ def self.name
c.strategy = Elasticity::Strategies::AliasIndex

c.mapping = {
properties: {
"properties" => {
id: { type: "integer" },
name: { type: "string", index: "not_analyzed" },
birthdate: { type: "date" },
Expand Down Expand Up @@ -243,6 +243,50 @@ def to_document
expect(results.total).to eq(2010)
end

it "does not copy over fields not defined in the mapping" do
john = subject.new(_id: 1, id: 1, name: "John", birthdate: "1985-10-31", sort: ['john'])
mari = subject.new(_id: 2, id: 2, name: "Mari", birthdate: "1986-09-24", sort: ['mari'])

john.update
mari.update

subject.flush_index
results = subject.search({})
expect(results.first.birthdate).to be

# no birthdate
subject = Class.new(Elasticity::Document) do
def self.name
"SomeClass"
end

configure do |c|
c.index_base_name = "users"
c.document_type = "user"
c.strategy = Elasticity::Strategies::AliasIndex

c.mapping = {
"properties" => {
id: { type: "integer" },
name: { type: "string", index: "not_analyzed" },
},
}
end

attr_accessor :id, :name

def to_document
{ id: id, name: name }
end
end

subject.remap!
subject.flush_index

results = subject.search({})
expect(results.first.respond_to?(:birthdate)).to be false
end

it "recover from remap interrupts" do
number_of_docs = 2000
docs = number_of_docs.times.map do |i|
Expand Down

0 comments on commit aa3c8c3

Please sign in to comment.