Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/passes/Outlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ struct Outlining : public Pass {
DBG(printHashString(stringify.hashString, stringify.exprs));
// Remove substrings that are substrings of longer repeat substrings.
substrings = StringifyProcessor::dedupe(substrings);
// Remove substrings with overlapping indices.
substrings = StringifyProcessor::filterOverlaps(substrings);
// Remove substrings with branch and return instructions until an analysis
// is performed to see if the intended destination of the branch is included
// in the substring to be outlined.
Expand Down
43 changes: 43 additions & 0 deletions src/passes/hash-stringify-walker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "stringify-walker.h"
#include "support/intervals.h"

namespace wasm {

Expand Down Expand Up @@ -147,6 +148,48 @@ std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe(
return result;
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterOverlaps(
const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
// A substring represents a contiguous set of instructions that appear more
// than once in a Wasm binary. For each appearance of the substring, an
// Interval is created that lacks a connection back to its originating
// substring. To fix, upon Interval creation, a second vector is populated with
// the index of the corresponding substring.
std::vector<Interval> intervals;
std::vector<int> substringIdxs;
Comment on lines +158 to +159
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to have a comment saying how these two vectors relate to each other.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


// Construct intervals
for (Index i = 0; i < substrings.size(); i++) {
auto substring = substrings[i];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid copying the list of start indices and whatever else is in there:

Suggested change
auto substring = substrings[i];
auto& substring = substrings[i];

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batched suggestion

for (auto startIdx : substring.StartIndices) {
auto interval =
Interval(startIdx, startIdx + substring.Length, substring.Length);
intervals.push_back(interval);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be slightly shorter using emplace_back.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

substringIdxs.push_back(i);
}
}

// Get the overlapping intervals
std::vector<SuffixTree::RepeatedSubstring> result;
std::vector<std::vector<Index>> startIndices(substrings.size());
std::vector<int> indices = IntervalProcessor::filterOverlaps(intervals);
for (auto i : indices) {
// i is the idx of the Interval in the intervals vector
// i in substringIdxs returns the idx of the substring that needs to be
// included in result
auto substringIdx = substringIdxs[i];
startIndices[substringIdx].push_back(intervals[i].start);
}
for (Index i = 0; i < startIndices.size(); i++) {
if (startIndices[i].size() > 1) {
result.push_back(SuffixTree::RepeatedSubstring(
{substrings[i].Length, std::move(startIndices[i])}));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be an emplace_back to really be sure we're not copying the vector of start indices?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Comment on lines +184 to +185
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
result.emplace_back(SuffixTree::RepeatedSubstring(
{substrings[i].Length, std::move(startIndices[i])}));
result.emplace_back(substrings[i].Length, std::move(startIndices[i]));

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I remove SuffixTree::RepeatedSubstring, I get the following error:

FAILED: src/passes/CMakeFiles/passes.dir/hash-stringify-walker.cpp.o /usr/bin/c++ -I/src -I/third_party/FP16/include -I/third_party/llvm-project/include -I -DBUILD_LLVM_DWARF -Wall -Werror -Wextra -Wno-unused-parameter -Wno-dangling-pointer -fno-omit-frame-pointer -fno-rtti -Wno-implicit-int-float-conversion -Wno-unknown-warning-option -Wswitch -Wimplicit-fallthrough -Wnon-virtual-dtor -fPIC -fdiagnostics-color=always -g -g3 -std=c++17 -MD -MT src/passes/CMakeFiles/passes.dir/hash-stringify-walker.cpp.o -MF src/passes/CMakeFiles/passes.dir/hash-stringify-walker.cpp.o.d -o src/passes/CMakeFiles/passes.dir/hash-stringify-walker.cpp.o -c /src/passes/hash-stringify-walker.cpp /src/passes/hash-stringify-walker.cpp: In static member function ‘static std::vector<wasm::SuffixTree::RepeatedSubstring> wasm::StringifyProcessor::filterOverlaps(const std::vector<wasm::SuffixTree::RepeatedSubstring>&)’: /src/passes/hash-stringify-walker.cpp:184:26: error: no matching function for call to ‘std::vector<wasm::SuffixTree::RepeatedSubstring>::emplace_back(<brace-enclosed initializer list>)’ 184 | result.emplace_back( | ~~~~~~~~~~~~~~~~~~~^ 185 | {substrings[i].Length, std::move(startIndices[i])}); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/c++/14/vector:72, from /src/wasm.h:34, from /src/ir/boolean.h:20, from /src/ir/bits.h:20, from /src/ir/properties.h:20, from /src/ir/iteration.h:20, from /src/passes/stringify-walker.h:20, from /src/passes/hash-stringify-walker.cpp:17: /usr/include/c++/14/bits/vector.tcc:111:7: note: candidate: ‘std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {}; _Tp = wasm::SuffixTree::RepeatedSubstring; _Alloc = std::allocator<wasm::SuffixTree::RepeatedSubstring>; reference = wasm::SuffixTree::RepeatedSubstring&]’ 111 | vector<_Tp, _Alloc>:: | ^~~~~~~~~~~~~~~~~~~ /usr/include/c++/14/bits/vector.tcc:111:7: note: candidate expects 0 arguments, 1 provided At global scope: cc1plus: note: unrecognized command-line option ‘-Wno-unknown-warning-option’ may have been intended to silence earlier diagnostics cc1plus: note: unrecognized command-line option ‘-Wno-implicit-int-float-conversion’ may have been intended to silence earlier diagnostics [3/18] Building CXX object src/passes/CMakeFiles/passes.dir/Outlining.cpp.o ninja: build stopped: subcommand failed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no constructor provided on SuffixTree, so we'll keep as-is

}
}

return result;
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter(
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
const std::vector<Expression*>& exprs,
Expand Down
2 changes: 2 additions & 0 deletions src/passes/stringify-walker.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "ir/module-utils.h"
#include "ir/stack-utils.h"
#include "ir/utils.h"
#include "support/intervals.h"
#include "support/suffix_tree.h"
#include "wasm-ir-builder.h"
#include "wasm-traversal.h"
Expand Down Expand Up @@ -264,6 +265,7 @@ using Substrings = std::vector<SuffixTree::RepeatedSubstring>;
struct StringifyProcessor {
static Substrings repeatSubstrings(std::vector<uint32_t>& hashString);
static Substrings dedupe(const Substrings& substrings);
static Substrings filterOverlaps(const Substrings& substrings);
// Filter is the general purpose function backing subsequent filter functions.
// It can be used directly, but generally prefer a wrapper function
// to encapsulate your condition and make it available for tests.
Expand Down
1 change: 1 addition & 0 deletions src/support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(support_SOURCES
debug.cpp
dfa_minimization.cpp
file.cpp
intervals.cpp
istring.cpp
json.cpp
name.cpp
Expand Down
66 changes: 66 additions & 0 deletions src/support/intervals.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <assert.h>

#include "intervals.h"
#include "support/index.h"
#include <algorithm>
#include <iostream>
#include <optional>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like these are stale.

Suggested change
#include <iostream>
#include <optional>

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batched suggestion


using namespace wasm;

std::vector<int>
IntervalProcessor::filterOverlaps(std::vector<Interval>& intervals) {
if (intervals.size() == 0) {
return std::vector<int>();
}

std::vector<std::pair<Interval, int>> intIntervals;
for (Index i = 0; i < intervals.size(); i++) {
auto& interval = intervals[i];
intIntervals.push_back({interval, i});
}

std::sort(
intIntervals.begin(), intIntervals.end());

std::vector<std::pair<Interval, int>> kept;
kept.push_back(intIntervals[0]);
for (auto& candidate : intIntervals) {
auto& former = kept.back();
if (former.first.end <= candidate.first.start) {
kept.push_back(candidate);
continue;
}

// former overlaps with candidate
// replace former if the weights are the same but the candidate ends earlier
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// former overlaps with candidate
// replace former if the weights are the same but the candidate ends earlier
// Former overlaps with candidate. Replace former if the weights are the same but the candidate ends earlier.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batched suggestion

if (former.first.weight == candidate.first.weight && former.first.end > candidate.first.end) {
former = candidate;
} else if (former.first.weight < candidate.first.weight) {
former = candidate;
}
}

std::vector<int> result;
for (auto& intPair : kept) {
result.push_back(intPair.second);
}

return result;
}
62 changes: 62 additions & 0 deletions src/support/intervals.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Helpers for handling a generic range of values

#ifndef wasm_support_intervals_h
#define wasm_support_intervals_h

#include <set>
#include <vector>

namespace wasm {

struct Interval {
unsigned start;
unsigned end;
// The weight is used to determine which interval to keep when two overlap,
// higher is better
unsigned weight;
Interval(unsigned start, unsigned end, unsigned weight)
: start(start), end(end), weight(weight) {}

bool operator<(const Interval& other) const {
/*return weight != other.weight ? weight < other.weight
: end != other.end ? end < other.end
: start < other.start;*/
/*return end != other.end ? end < other.end
: weight != other.weight ? weight < other.weight
: start < other.start;*/
return start != other.start ? start < other.start
: weight != other.weight ? weight < other.weight
: end < other.end;
}
Comment on lines +36 to +40
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should take end into account as well. Otherwise the std::set<Interval> returned by IntervalProcessor::getOverlaps() will not be able to hold two intervals that differ only in their ends.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


bool operator==(const Interval& other) const {
return start == other.start && end == other.end && weight == other.weight;
}
};

struct IntervalProcessor {
// Given a vector of Interval, returns a vector of the indices that, mapping
// back to the original input vector, do not overlap with each other, ie: the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// back to the original input vector, do not overlap with each other, ie: the
// back to the original input vector, do not overlap with each other, i.e. the

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't look like it!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batched suggestion

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adjust punctuation around ie to , i.e.,

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// interval indexes with overlapping interval indexes already removed.
static std::vector<int> filterOverlaps(std::vector<Interval>&);
};

} // namespace wasm

#endif // wasm_suport_intervals
1 change: 1 addition & 0 deletions test/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set(unittest_SOURCES
dfa_minimization.cpp
disjoint_sets.cpp
interpreter.cpp
intervals.cpp
json.cpp
lattices.cpp
local-graph.cpp
Expand Down
121 changes: 121 additions & 0 deletions test/gtest/intervals.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include "support/intervals.h"
#include "ir/utils.h"
#include "gtest/gtest.h"

using namespace wasm;

TEST(IntervalsTest, TestEmpty) {
std::vector<Interval> intervals;
std::vector<int> expected;
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOne) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 2, 5});
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're using emplace_back, we can just pass the arguments to the constructor without having to construct and then move a temporary object.

Suggested change
intervals.emplace_back(Interval{0, 2, 5});
intervals.emplace_back(0, 2, 5);

std::vector<int> expected{0};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestNoOverlapFound) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 4, 2});
intervals.emplace_back(Interval{4, 8, 2});
std::vector<int> expected{0, 1};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFound) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 2, 5});
intervals.emplace_back(Interval{1, 4, 10});
intervals.emplace_back(Interval{4, 5, 15});
std::vector<int> expected{1, 2};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundA) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be best to give these more descriptive names like TestOverlapInnerNested, TestOverlapOuterNested, TestOverlapAscendingSequence, TestOverlapDescendingSequence, etc. Ideally the role each interval plays in a test should be clear from the name of the test.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 2, 1});
intervals.emplace_back(Interval{1, 4, 1});
intervals.emplace_back(Interval{4, 6, 1});
intervals.emplace_back(Interval{4, 5, 1});
std::vector<int> expected{0, 3};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundB) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 3, 4});
intervals.emplace_back(Interval{3, 6, 2});
intervals.emplace_back(Interval{6, 10, 3});
intervals.emplace_back(Interval{8, 11, 1});
intervals.emplace_back(Interval{7, 15, 5});
std::vector<int> expected{0, 1, 4};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundC) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 3, 6});
intervals.emplace_back(Interval{3, 6, 2});
intervals.emplace_back(Interval{2, 10, 3});
intervals.emplace_back(Interval{8, 11, 1});
intervals.emplace_back(Interval{9, 15, 5});
intervals.emplace_back(Interval{3, 15, 5});
std::vector<int> expected{0, 5};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundD) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{2, 6, 2});
intervals.emplace_back(Interval{12, 18, 3});
intervals.emplace_back(Interval{4, 11, 1});
intervals.emplace_back(Interval{6, 20, 15});
intervals.emplace_back(Interval{0, 3, 6});
intervals.emplace_back(Interval{3, 15, 5});
intervals.emplace_back(Interval{14, 21, 5});
intervals.emplace_back(Interval{23, 28, 5});
std::vector<int> expected{4, 3, 7};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundE) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{21, 24, 1});
intervals.emplace_back(Interval{0, 5, 1});
intervals.emplace_back(Interval{11, 18, 1});
intervals.emplace_back(Interval{28, 35, 1});
intervals.emplace_back(Interval{5, 11, 1});
intervals.emplace_back(Interval{18, 21, 1});
intervals.emplace_back(Interval{35, 40, 1});
intervals.emplace_back(Interval{24, 28, 1});
std::vector<int> expected{1, 4, 2, 5, 0, 7, 3, 6};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundF) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 5, 10});
intervals.emplace_back(Interval{5, 11, 100});
intervals.emplace_back(Interval{11, 18, 5});
intervals.emplace_back(Interval{18, 21, 8});
intervals.emplace_back(Interval{21, 24, 2});
intervals.emplace_back(Interval{24, 28, 4});
intervals.emplace_back(Interval{28, 35, 6});
intervals.emplace_back(Interval{35, 40, 7});
std::vector<int> expected{0, 1, 2, 3, 4, 5, 6, 7};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}

TEST(IntervalsTest, TestOverlapFoundG) {
std::vector<Interval> intervals;
intervals.emplace_back(Interval{0, 5, 1});
intervals.emplace_back(Interval{0, 5, 1});
intervals.emplace_back(Interval{0, 5, 1});
intervals.emplace_back(Interval{0, 5, 1});
intervals.emplace_back(Interval{0, 5, 1});
std::vector<int> expected{0};
ASSERT_EQ(IntervalProcessor::filterOverlaps(intervals), expected);
}
Loading
Loading