Skip to content

Commit 240a029

Browse files
agampemeta-codesync[bot]
authored andcommitted
Run fast regalloc for large methods
Summary: Improve cost of repeated DU chain computation. The heuristic is very simplistic. If it ever becomes to expensive consider adding another threshold for number of potential new-instance replacements (=number of loop iterations). Reviewed By: wsanville Differential Revision: D91352690 fbshipit-source-id: cb1cea3d4e3236fa4ed30a2fd43c0762d03834c2
1 parent 72bd3bb commit 240a029

File tree

1 file changed

+97
-68
lines changed

1 file changed

+97
-68
lines changed

opt/object-escape-analysis/ObjectEscapeAnalysis.cpp

Lines changed: 97 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "IRInstruction.h"
5454
#include "Inliner.h"
5555
#include "InlinerConfig.h"
56+
#include "LinearScan.h"
5657
#include "MutablePriorityQueue.h"
5758
#include "ObjectEscapeAnalysisImpl.h"
5859
#include "ObjectEscapeAnalysisPlugin.h"
@@ -720,6 +721,7 @@ struct Stats {
720721
std::atomic<size_t> new_instances_eliminated{0};
721722
size_t inlined_methods_removed{0};
722723
size_t inlinable_methods_kept{0};
724+
std::atomic<size_t> reg_allocs{0};
723725
};
724726

725727
struct ReducedMethodVariant {
@@ -894,6 +896,18 @@ class RootMethodReducer {
894896
return std::nullopt;
895897
}
896898

899+
{
900+
// Possibly reduce complexity of CFG for DU chain computation.
901+
m_method->get_code()->cfg().recompute_registers_size();
902+
auto num_regs = m_method->get_code()->cfg().get_registers_size();
903+
constexpr size_t kRegAllocThreshold = 256;
904+
if (num_regs >= kRegAllocThreshold) {
905+
fastregalloc::LinearScanAllocator allocator(m_method);
906+
allocator.allocate();
907+
m_stats->reg_allocs.fetch_add(1, std::memory_order_relaxed);
908+
}
909+
}
910+
897911
while (auto opt_p = find_inlinable_new_instance()) {
898912
if (!stackify(opt_p->first, opt_p->second)) {
899913
return std::nullopt;
@@ -1554,22 +1568,25 @@ UnorderedMap<DexMethod*, std::vector<ReducedMethod>> compute_reduced_methods(
15541568
// with N inlinable types, there will be N variants.
15551569
std::vector<std::pair<DexMethod*, InlinableTypes>>
15561570
ordered_root_methods_variants;
1557-
for (auto&& [method, types] : UnorderedIterable(root_methods)) {
1558-
auto ordered_types = unordered_to_ordered(types, [&](auto& p, auto& q) {
1559-
return inlinable_type_index.at(p.first) <
1560-
inlinable_type_index.at(q.first);
1561-
});
1562-
for (auto it = ordered_types.begin(); it != ordered_types.end(); it++) {
1563-
ordered_root_methods_variants.emplace_back(
1564-
method, InlinableTypes(it, ordered_types.end()));
1571+
{
1572+
Timer sub_t{"ordered_root_methods_variants"};
1573+
for (auto&& [method, types] : UnorderedIterable(root_methods)) {
1574+
auto ordered_types = unordered_to_ordered(types, [&](auto& p, auto& q) {
1575+
return inlinable_type_index.at(p.first) <
1576+
inlinable_type_index.at(q.first);
1577+
});
1578+
for (auto it = ordered_types.begin(); it != ordered_types.end(); it++) {
1579+
ordered_root_methods_variants.emplace_back(
1580+
method, InlinableTypes(it, ordered_types.end()));
1581+
}
15651582
}
1583+
// Order such that items with many types to process go first, which improves
1584+
// workqueue efficiency.
1585+
std::stable_sort(ordered_root_methods_variants.begin(),
1586+
ordered_root_methods_variants.end(), [](auto& a, auto& b) {
1587+
return a.second.size() > b.second.size();
1588+
});
15661589
}
1567-
// Order such that items with many types to process go first, which improves
1568-
// workqueue efficiency.
1569-
std::stable_sort(ordered_root_methods_variants.begin(),
1570-
ordered_root_methods_variants.end(), [](auto& a, auto& b) {
1571-
return a.second.size() > b.second.size();
1572-
});
15731590

15741591
// Special marker method, used to identify which newly created objects of only
15751592
// incompletely inlinable types should get inlined.
@@ -1587,71 +1604,82 @@ UnorderedMap<DexMethod*, std::vector<ReducedMethod>> compute_reduced_methods(
15871604
if (g_redex->instrument_mode && g_redex->slow_invariants_debug) {
15881605
num_threads = std::min<size_t>(num_threads, 16u);
15891606
}
1590-
workqueue_run<std::pair<DexMethod*, InlinableTypes>>(
1591-
[&](const std::pair<DexMethod*, InlinableTypes>& p) {
1592-
auto* method = p.first;
1593-
const auto& types = p.second;
1594-
auto copy_name_str = method->get_name()->str() + "$oea$internal$" +
1595-
std::to_string(types.size());
1596-
auto* copy = DexMethod::make_method_from(
1597-
method, method->get_class(), DexString::make_string(copy_name_str));
1598-
RootMethodReducer root_method_reducer{config,
1599-
apply_shrinking_plugins,
1600-
code_size_cache,
1601-
method_override_graph,
1602-
expandable_method_params,
1603-
incomplete_marker_method,
1604-
inliner,
1605-
method_summaries,
1606-
excluded_classes,
1607-
stats,
1608-
method::is_init(method) ||
1609-
method::is_clinit(method),
1610-
copy,
1611-
types,
1612-
callees_cache,
1613-
method_summary_cache};
1614-
auto reduced_method = root_method_reducer.reduce();
1615-
if (reduced_method) {
1616-
concurrent_reduced_methods.update(
1617-
method, [&](auto*, auto& reduced_methods_variants, bool) {
1618-
reduced_methods_variants.emplace_back(
1619-
std::move(*reduced_method));
1620-
});
1621-
return;
1622-
}
1623-
DexMethod::delete_method_DO_NOT_USE(copy);
1624-
},
1625-
ordered_root_methods_variants, num_threads);
1607+
1608+
{
1609+
Timer sub_t{"reduce"};
1610+
workqueue_run<std::pair<DexMethod*, InlinableTypes>>(
1611+
[&](const std::pair<DexMethod*, InlinableTypes>& p) {
1612+
auto* method = p.first;
1613+
const auto& types = p.second;
1614+
auto copy_name_str = method->get_name()->str() + "$oea$internal$" +
1615+
std::to_string(types.size());
1616+
auto* copy = DexMethod::make_method_from(
1617+
method, method->get_class(),
1618+
DexString::make_string(copy_name_str));
1619+
RootMethodReducer root_method_reducer{config,
1620+
apply_shrinking_plugins,
1621+
code_size_cache,
1622+
method_override_graph,
1623+
expandable_method_params,
1624+
incomplete_marker_method,
1625+
inliner,
1626+
method_summaries,
1627+
excluded_classes,
1628+
stats,
1629+
method::is_init(method) ||
1630+
method::is_clinit(method),
1631+
copy,
1632+
types,
1633+
callees_cache,
1634+
method_summary_cache};
1635+
auto reduced_method = root_method_reducer.reduce();
1636+
if (reduced_method) {
1637+
concurrent_reduced_methods.update(
1638+
method, [&](auto*, auto& reduced_methods_variants, bool) {
1639+
reduced_methods_variants.emplace_back(
1640+
std::move(*reduced_method));
1641+
});
1642+
return;
1643+
}
1644+
DexMethod::delete_method_DO_NOT_USE(copy);
1645+
},
1646+
ordered_root_methods_variants, num_threads);
1647+
}
16261648

16271649
// For each root method, we order the reduced methods (if any) by how many
16281650
// types were inlined, with the largest number of inlined types going first.
16291651
UnorderedMap<DexMethod*, std::vector<ReducedMethod>> reduced_methods;
1630-
for (auto& [method, reduced_methods_variants] :
1631-
UnorderedIterable(concurrent_reduced_methods)) {
1632-
std::sort(
1633-
reduced_methods_variants.begin(), reduced_methods_variants.end(),
1634-
[&](auto& a, auto& b) { return a.types.size() > b.types.size(); });
1635-
reduced_methods.emplace(method, std::move(reduced_methods_variants));
1652+
{
1653+
Timer sub_t{"reduced_methods"};
1654+
for (auto& [method, reduced_methods_variants] :
1655+
UnorderedIterable(concurrent_reduced_methods)) {
1656+
std::sort(
1657+
reduced_methods_variants.begin(), reduced_methods_variants.end(),
1658+
[&](auto& a, auto& b) { return a.types.size() > b.types.size(); });
1659+
reduced_methods.emplace(method, std::move(reduced_methods_variants));
1660+
}
16361661
}
16371662

16381663
// All types which could not be accomodated by any reduced method variants are
16391664
// marked as "irreducible", which is later used when doing a global cost
16401665
// analysis.
16411666
static const InlinableTypes no_types;
1642-
for (auto&& [method, types] : UnorderedIterable(root_methods)) {
1643-
auto it = reduced_methods.find(method);
1644-
const auto& largest_types =
1645-
it == reduced_methods.end() ? no_types : it->second.front().types;
1646-
for (auto&& [type, inlinable_info] : UnorderedIterable(types)) {
1647-
if (largest_types.count(type) == 0u) {
1648-
for (auto* cls = type_class(type);
1649-
(cls != nullptr) && !cls->is_external();
1650-
cls = type_class(cls->get_super_class())) {
1651-
irreducible_types->insert(cls->get_type());
1667+
{
1668+
Timer sub_t{"irreducible"};
1669+
for (auto&& [method, types] : UnorderedIterable(root_methods)) {
1670+
auto it = reduced_methods.find(method);
1671+
const auto& largest_types =
1672+
it == reduced_methods.end() ? no_types : it->second.front().types;
1673+
for (auto&& [type, inlinable_info] : UnorderedIterable(types)) {
1674+
if (largest_types.count(type) == 0u) {
1675+
for (auto* cls = type_class(type);
1676+
(cls != nullptr) && !cls->is_external();
1677+
cls = type_class(cls->get_super_class())) {
1678+
irreducible_types->insert(cls->get_type());
1679+
}
1680+
insert_unordered_iterable(*inlinable_methods_kept,
1681+
inlinable_info.inlinable_methods);
16521682
}
1653-
insert_unordered_iterable(*inlinable_methods_kept,
1654-
inlinable_info.inlinable_methods);
16551683
}
16561684
}
16571685
}
@@ -2155,6 +2183,7 @@ void ObjectEscapeAnalysisPass::run_pass(DexStoresVector& stores,
21552183
mgr.incr_metric("lost_returns_through_shrinking",
21562184
lost_returns_through_shrinking);
21572185
mgr.incr_metric("method_summaries", method_summaries.size());
2186+
mgr.incr_metric("reg_allocs", stats.reg_allocs);
21582187
}
21592188

21602189
static ObjectEscapeAnalysisPass s_pass;

0 commit comments

Comments
 (0)