-
Notifications
You must be signed in to change notification settings - Fork 839
[Outlining] Remove overlapping sequences #7146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ab65ba2
4636a3f
aff6cd7
76d31b0
6a5aa0f
b05f948
15485fa
c68c921
956dbd0
6408a25
fd7c6c8
061211e
f828e45
a6bc337
26d4ab7
9ece5b6
34cd8d1
54e1903
b83bdb3
116ee63
d22fc3a
79c0800
167dbf6
381b2ce
515d550
a044f24
7d30725
4f011f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |||||||
| */ | ||||||||
|
|
||||||||
| #include "stringify-walker.h" | ||||||||
| #include "support/intervals.h" | ||||||||
|
|
||||||||
| namespace wasm { | ||||||||
|
|
||||||||
|
|
@@ -147,6 +148,47 @@ std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe( | |||||||
| return result; | ||||||||
| } | ||||||||
|
|
||||||||
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterOverlaps( | ||||||||
| const std::vector<SuffixTree::RepeatedSubstring>& substrings) { | ||||||||
| // A substring represents a contiguous set of instructions that appear more | ||||||||
| // than once in a Wasm binary. For each appearance of the substring, an | ||||||||
| // Interval is created that lacks a connection back to its originating | ||||||||
| // substring. To fix, upon Interval creation, a second vector is populated | ||||||||
| // with the index of the corresponding substring. | ||||||||
| std::vector<Interval> intervals; | ||||||||
| std::vector<int> substringIdxs; | ||||||||
|
|
||||||||
| // Construct intervals | ||||||||
| for (Index i = 0; i < substrings.size(); i++) { | ||||||||
| auto& substring = substrings[i]; | ||||||||
| for (auto startIdx : substring.StartIndices) { | ||||||||
| intervals.emplace_back( | ||||||||
| startIdx, startIdx + substring.Length, substring.Length); | ||||||||
| substringIdxs.push_back(i); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| // Get the overlapping intervals | ||||||||
| std::vector<SuffixTree::RepeatedSubstring> result; | ||||||||
| std::vector<std::vector<Index>> startIndices(substrings.size()); | ||||||||
| std::vector<int> indices = IntervalProcessor::filterOverlaps(intervals); | ||||||||
| for (auto i : indices) { | ||||||||
| // i is the idx of the Interval in the intervals vector | ||||||||
| // i in substringIdxs returns the idx of the substring that needs to be | ||||||||
| // included in result | ||||||||
| auto substringIdx = substringIdxs[i]; | ||||||||
| startIndices[substringIdx].push_back(intervals[i].start); | ||||||||
| } | ||||||||
| for (Index i = 0; i < startIndices.size(); i++) { | ||||||||
| if (startIndices[i].size() > 1) { | ||||||||
| result.emplace_back(SuffixTree::RepeatedSubstring( | ||||||||
| {substrings[i].Length, std::move(startIndices[i])})); | ||||||||
|
Comment on lines
+184
to
+185
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I remove SuffixTree::RepeatedSubstring, I get the following error:
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no constructor provided on SuffixTree, so we'll keep as-is |
||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| return result; | ||||||||
| } | ||||||||
|
|
||||||||
| std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter( | ||||||||
| const std::vector<SuffixTree::RepeatedSubstring>& substrings, | ||||||||
| const std::vector<Expression*>& exprs, | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| /* | ||
| * Copyright 2024 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #include <assert.h> | ||
|
|
||
| #include "intervals.h" | ||
| #include "support/index.h" | ||
| #include <algorithm> | ||
|
|
||
| using namespace wasm; | ||
|
|
||
| std::vector<int> | ||
| IntervalProcessor::filterOverlaps(std::vector<Interval>& intervals) { | ||
| if (intervals.size() == 0) { | ||
| return std::vector<int>(); | ||
| } | ||
|
|
||
| std::vector<std::pair<Interval, int>> intIntervals; | ||
| for (Index i = 0; i < intervals.size(); i++) { | ||
| auto& interval = intervals[i]; | ||
| intIntervals.push_back({interval, i}); | ||
| } | ||
|
|
||
| std::sort(intIntervals.begin(), intIntervals.end()); | ||
|
|
||
| std::vector<std::pair<Interval, int>> kept; | ||
| kept.push_back(intIntervals[0]); | ||
| for (auto& candidate : intIntervals) { | ||
| auto& former = kept.back(); | ||
| if (former.first.end <= candidate.first.start) { | ||
| kept.push_back(candidate); | ||
| continue; | ||
| } | ||
|
|
||
| // When two intervals overlap with the same weight, prefer to keep the | ||
| // interval that ends sooner under the presumption that it will overlap with | ||
| // fewer subsequent intervals. | ||
| if (former.first.weight == candidate.first.weight && | ||
| former.first.end > candidate.first.end) { | ||
| former = candidate; | ||
tlively marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else if (former.first.weight < candidate.first.weight) { | ||
| former = candidate; | ||
| } | ||
| } | ||
|
|
||
| std::vector<int> result; | ||
| for (auto& intPair : kept) { | ||
| result.push_back(intPair.second); | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| /* | ||
| * Copyright 2024 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| // Helpers for handling a generic range of values | ||
|
|
||
| #ifndef wasm_support_intervals_h | ||
| #define wasm_support_intervals_h | ||
|
|
||
| #include <set> | ||
| #include <vector> | ||
|
|
||
| namespace wasm { | ||
|
|
||
| struct Interval { | ||
| unsigned start; | ||
| unsigned end; | ||
| // The weight is used to determine which interval to keep when two overlap, | ||
| // higher is better | ||
| unsigned weight; | ||
| Interval(unsigned start, unsigned end, unsigned weight) | ||
| : start(start), end(end), weight(weight) {} | ||
|
|
||
| bool operator<(const Interval& other) const { | ||
| return start != other.start ? start < other.start | ||
| : weight != other.weight ? weight < other.weight | ||
| : end < other.end; | ||
| } | ||
|
|
||
| bool operator==(const Interval& other) const { | ||
| return start == other.start && end == other.end && weight == other.weight; | ||
| } | ||
| }; | ||
|
|
||
| struct IntervalProcessor { | ||
| // Given a vector of Interval, returns a vector of the indices that, mapping | ||
| // back to the original input vector, do not overlap with each other, i.e. the | ||
| // interval indexes with overlapping interval indexes already removed. | ||
| static std::vector<int> filterOverlaps(std::vector<Interval>&); | ||
| }; | ||
|
|
||
| } // namespace wasm | ||
|
|
||
| #endif // wasm_suport_intervals |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| #include "support/intervals.h" | ||
| #include "ir/utils.h" | ||
| #include "gtest/gtest.h" | ||
|
|
||
| using namespace wasm; | ||
|
|
||
| TEST(IntervalsTest, TestEmpty) { | ||
| std::vector<Interval> intervals; | ||
| std::vector<int> expected; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOne) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(0, 2, 5); | ||
| std::vector<int> expected{0}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestNoOverlapFound) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(0, 4, 2); | ||
| intervals.emplace_back(4, 8, 2); | ||
| std::vector<int> expected{0, 1}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOverlapFound) { | ||
tlively marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(0, 2, 5); | ||
| intervals.emplace_back(1, 4, 10); | ||
| intervals.emplace_back(4, 5, 15); | ||
| std::vector<int> expected{1, 2}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOverlapAscendingSequence) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(0, 3, 6); | ||
| intervals.emplace_back(2, 6, 2); | ||
| intervals.emplace_back(3, 15, 5); | ||
| intervals.emplace_back(4, 11, 1); | ||
| intervals.emplace_back(6, 20, 15); | ||
| intervals.emplace_back(12, 18, 3); | ||
| intervals.emplace_back(14, 21, 5); | ||
| intervals.emplace_back(23, 28, 5); | ||
| std::vector<int> expected{0, 4, 7}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOverlapDescendingSequence) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(9, 15, 5); | ||
| intervals.emplace_back(8, 11, 1); | ||
| intervals.emplace_back(3, 15, 5); | ||
| intervals.emplace_back(3, 6, 2); | ||
| intervals.emplace_back(2, 10, 3); | ||
| intervals.emplace_back(0, 3, 6); | ||
| std::vector<int> expected{5, 2}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOverlapRandomSequence) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(21, 24, 1); | ||
| intervals.emplace_back(0, 5, 1); | ||
| intervals.emplace_back(11, 18, 1); | ||
| intervals.emplace_back(28, 35, 1); | ||
| intervals.emplace_back(5, 11, 1); | ||
| intervals.emplace_back(18, 21, 1); | ||
| intervals.emplace_back(35, 40, 1); | ||
| intervals.emplace_back(24, 28, 1); | ||
| std::vector<int> expected{1, 4, 2, 5, 0, 7, 3, 6}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOverlapInnerNested) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(0, 12, 3); | ||
| intervals.emplace_back(2, 4, 2); | ||
| intervals.emplace_back(3, 6, 5); | ||
| intervals.emplace_back(12, 15, 4); | ||
| std::vector<int> expected{2, 3}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
|
|
||
| TEST(IntervalsTest, TestOverlapOuterNested) { | ||
| std::vector<Interval> intervals; | ||
| intervals.emplace_back(0, 3, 6); | ||
| intervals.emplace_back(4, 11, 1); | ||
| intervals.emplace_back(12, 18, 3); | ||
| intervals.emplace_back(2, 15, 6); | ||
| std::vector<int> expected{0, 1, 2}; | ||
| ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1006,3 +1006,71 @@ | |
| (loop (nop)) | ||
| ) | ||
| ) | ||
|
|
||
| ;; Test that no attempt is made to outline overlapping repeat substrings | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to add comments about what the overlapping substrings are.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| ;; In the below module, the Outlining optimization identifies two substrings | ||
| ;; that each repeat twice. During filtering, one of the repeat substrings is | ||
| ;; found to have an overlapping interval with itself. Because an interval is | ||
| ;; dropped, only one of the substrings repeats enough times (minimum twice) | ||
| ;; to warrant outlining. | ||
| (module | ||
| ;; CHECK: (type $0 (func)) | ||
|
|
||
| ;; CHECK: (func $outline$ (type $0) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 0) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 1) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 2) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 3) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
|
|
||
| ;; CHECK: (func $a (type $0) | ||
| ;; CHECK-NEXT: (call $outline$) | ||
| ;; CHECK-NEXT: (call $outline$) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 1) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 2) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 1) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (i32.const 2) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
|
Comment on lines
+1043
to
+1049
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this substring isn't outlined at all, it's not clear to me that the test is behaving as expected. If we add more repetitions of this shorter substring, will it be outlined?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| (func $a | ||
| i32.const 0 ;; Begin substring 1 | ||
| drop | ||
| i32.const 1 | ||
| drop | ||
| i32.const 2 | ||
| drop | ||
| i32.const 3 | ||
| drop ;; End substring 1 | ||
| i32.const 0 ;; Begin substring 1 repeat | ||
| drop | ||
| i32.const 1 | ||
| drop | ||
| i32.const 2 | ||
| drop | ||
| i32.const 3 | ||
| drop ;; End substring 1 repeat && Begin substring 2 | ||
| i32.const 1 | ||
| drop | ||
| i32.const 2 | ||
| drop ;; End substring 2 && Begin substring 2 repeat | ||
| i32.const 1 | ||
| drop | ||
| i32.const 2 | ||
| drop ;; End substring 2 repeat | ||
| ) | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be good to have a comment saying how these two vectors relate to each other.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done