Skip to content

Commit 475f528

Browse files
[AP][Timing] Added Basic Net Weighting
Added basic timing awareness to the AP flow by weighting nets in the AP solver by their criticality (the max criticality of all edges through that net). This makes the solver try to minimize the length of nets that are more critical more than nets that are less critical (according to the pre-clustering timing analyzer). Added a command-line option to tradeoff between timing and wirelength in the AP flow.
1 parent 433a917 commit 475f528

File tree

25 files changed

+233
-60
lines changed

25 files changed

+233
-60
lines changed

doc/src/vpr/command_line_usage.rst

+9
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,15 @@ Analytical Placement is generally split into three stages:
12531253

12541254
**Default:** ``annealer``
12551255

1256+
.. option:: --ap_timing_tradeoff <float>
1257+
1258+
Controls the trade-off between wirelength (HPWL) and delay minimization in the AP flow.
1259+
1260+
A value of 0.0 makes the AP flow focus completely on wirelength minimization,
1261+
while a value of 1.0 makes the AP flow focus completely on timing optimization.
1262+
1263+
**Default:** ``0.5``
1264+
12561265
.. option:: --ap_verbosity <int>
12571266

12581267
Controls the verbosity of the AP flow output.

vpr/src/analytical_place/analytical_placement_flow.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
#include "analytical_placement_flow.h"
99
#include <memory>
10+
#include "PreClusterTimingManager.h"
1011
#include "analytical_solver.h"
1112
#include "ap_netlist.h"
1213
#include "atom_netlist.h"
14+
#include "cluster_util.h"
1315
#include "detailed_placer.h"
1416
#include "full_legalizer.h"
1517
#include "gen_ap_netlist_from_atoms.h"
@@ -120,6 +122,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
120122
const AtomNetlist& atom_nlist,
121123
const APNetlist& ap_netlist,
122124
const Prepacker& prepacker,
125+
const PreClusterTimingManager& pre_cluster_timing_manager,
123126
const DeviceContext& device_ctx) {
124127
if (g_vpr_ctx.atom().flat_placement_info().valid) {
125128
VTR_LOG("Flat Placement is provided in the AP flow, skipping the Global Placement.\n");
@@ -139,6 +142,8 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
139142
device_ctx.grid,
140143
device_ctx.logical_block_types,
141144
device_ctx.physical_tile_types,
145+
pre_cluster_timing_manager,
146+
ap_opts.ap_timing_tradeoff,
142147
ap_opts.log_verbosity);
143148
return global_placer->place();
144149
}
@@ -163,12 +168,25 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
163168
constraints);
164169
print_ap_netlist_stats(ap_netlist);
165170

171+
// Pre-compute the pre-clustering timing delays. This object will be passed
172+
// into the global placer and the full legalizer to make them timing driven.
173+
PreClusterTimingManager pre_cluster_timing_manager(vpr_setup.PackerOpts.timing_driven,
174+
atom_nlist,
175+
g_vpr_ctx.atom().lookup(),
176+
prepacker,
177+
vpr_setup.PackerOpts.timing_update_type,
178+
*device_ctx.arch,
179+
vpr_setup.RoutingArch,
180+
vpr_setup.PackerOpts.device_layout,
181+
vpr_setup.AnalysisOpts);
182+
166183
// Run the Global Placer.
167184
const t_ap_opts& ap_opts = vpr_setup.APOpts;
168185
PartialPlacement p_placement = run_global_placer(ap_opts,
169186
atom_nlist,
170187
ap_netlist,
171188
prepacker,
189+
pre_cluster_timing_manager,
172190
device_ctx);
173191

174192
// Verify that the partial placement is valid before running the full
@@ -185,6 +203,7 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
185203
ap_netlist,
186204
atom_nlist,
187205
prepacker,
206+
pre_cluster_timing_manager,
188207
vpr_setup,
189208
*device_ctx.arch,
190209
device_ctx.grid);

vpr/src/analytical_place/analytical_solver.cpp

+57-12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#include <memory>
1414
#include <utility>
1515
#include <vector>
16+
#include "PreClusterTimingManager.h"
17+
#include "atom_netlist.h"
18+
#include "atom_netlist_fwd.h"
1619
#include "device_grid.h"
1720
#include "flat_placement_types.h"
1821
#include "partial_placement.h"
@@ -42,23 +45,39 @@
4245
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
4346
const APNetlist& netlist,
4447
const DeviceGrid& device_grid,
48+
const AtomNetlist& atom_netlist,
49+
const PreClusterTimingManager& pre_cluster_timing_manager,
50+
float ap_timing_tradeoff,
4551
int log_verbosity) {
4652
// Based on the solver type passed in, build the solver.
4753
switch (solver_type) {
4854
case e_ap_analytical_solver::QP_Hybrid:
4955
#ifdef EIGEN_INSTALLED
50-
return std::make_unique<QPHybridSolver>(netlist, device_grid, log_verbosity);
56+
return std::make_unique<QPHybridSolver>(netlist,
57+
device_grid,
58+
atom_netlist,
59+
pre_cluster_timing_manager,
60+
ap_timing_tradeoff,
61+
log_verbosity);
5162
#else
5263
(void)netlist;
5364
(void)device_grid;
65+
(void)atom_netlist;
66+
(void)pre_cluster_timing_manager;
67+
(void)ap_timing_tradeoff;
5468
(void)log_verbosity;
5569
VPR_FATAL_ERROR(VPR_ERROR_AP,
5670
"QP Hybrid Solver requires the Eigen library");
5771
break;
5872
#endif // EIGEN_INSTALLED
5973
case e_ap_analytical_solver::LP_B2B:
6074
#ifdef EIGEN_INSTALLED
61-
return std::make_unique<B2BSolver>(netlist, device_grid, log_verbosity);
75+
return std::make_unique<B2BSolver>(netlist,
76+
device_grid,
77+
atom_netlist,
78+
pre_cluster_timing_manager,
79+
ap_timing_tradeoff,
80+
log_verbosity);
6281
#else
6382
VPR_FATAL_ERROR(VPR_ERROR_AP,
6483
"LP B2B Solver requires the Eigen library");
@@ -72,10 +91,15 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
7291
return nullptr;
7392
}
7493

75-
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
94+
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
95+
const AtomNetlist& atom_netlist,
96+
const PreClusterTimingManager& pre_cluster_timing_manager,
97+
float ap_timing_tradeoff,
98+
int log_verbosity)
7699
: netlist_(netlist)
77100
, blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID())
78101
, row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID())
102+
, net_weights_(netlist.nets().size(), 1.0f)
79103
, log_verbosity_(log_verbosity) {
80104
// Get the number of moveable blocks in the netlist and create a unique
81105
// row ID from [0, num_moveable_blocks) for each moveable block in the
@@ -94,6 +118,21 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
94118
current_row_id++;
95119
num_moveable_blocks_++;
96120
}
121+
122+
if (pre_cluster_timing_manager.is_valid()) {
123+
for (APNetId net_id : netlist.nets()) {
124+
// Get the atom net associated with the given AP net. When
125+
// constructing the AP netlist, we happen to set the name of each
126+
// AP net to the same name as the atom net that generated them!
127+
// TODO: Create a proper lookup structure to go from the AP Netlist
128+
// back to the Atom Netlist.
129+
AtomNetId atom_net_id = atom_netlist.find_net(netlist.net_name(net_id));
130+
VTR_ASSERT(atom_net_id.is_valid());
131+
float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist);
132+
133+
net_weights_[net_id] = ap_timing_tradeoff * crit + (1.0f - ap_timing_tradeoff);
134+
}
135+
}
97136
}
98137

99138
#ifdef EIGEN_INSTALLED
@@ -201,12 +240,15 @@ void QPHybridSolver::init_linear_system() {
201240
for (APNetId net_id : netlist_.nets()) {
202241
size_t num_pins = netlist_.net_pins(net_id).size();
203242
VTR_ASSERT_DEBUG(num_pins > 1);
243+
244+
double net_weight = net_weights_[net_id];
245+
204246
if (num_pins > star_num_pins_threshold) {
205247
// Create a star node and connect each block in the net to the star
206248
// node.
207249
// Using the weight from FastPlace
208250
// TODO: Investigate other weight terms.
209-
double w = static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
251+
double w = net_weight * static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
210252
size_t star_node_id = num_moveable_blocks_ + star_node_offset;
211253
for (APPinId pin_id : netlist_.net_pins(net_id)) {
212254
APBlockId blk_id = netlist_.pin_block(pin_id);
@@ -220,7 +262,7 @@ void QPHybridSolver::init_linear_system() {
220262
// exactly once to every other block in the net.
221263
// Using the weight from FastPlace
222264
// TODO: Investigate other weight terms.
223-
double w = 1.0 / static_cast<double>(num_pins - 1);
265+
double w = net_weight * 1.0 / static_cast<double>(num_pins - 1);
224266
for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) {
225267
APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx);
226268
APBlockId first_blk_id = netlist_.pin_block(first_pin_id);
@@ -638,6 +680,7 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id,
638680
void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
639681
APBlockId second_blk_id,
640682
size_t num_pins,
683+
double net_w,
641684
const vtr::vector<APBlockId, double>& blk_locs,
642685
std::vector<Eigen::Triplet<double>>& triplet_list,
643686
Eigen::VectorXd& b) {
@@ -660,7 +703,7 @@ void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
660703
// The denominator of weight is zero, which causes infinity term in the matrix. Another way of
661704
// interpreting epsilon is the minimum distance two nodes are considered to be in placement.
662705
double dist = std::max(std::abs(blk_locs[first_blk_id] - blk_locs[second_blk_id]), distance_epsilon_);
663-
double w = (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
706+
double w = net_w * (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
664707

665708
// Update the connectivity matrix and the constant vector.
666709
// This is similar to how connections are added for the quadratic formulation.
@@ -696,6 +739,8 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
696739
size_t num_pins = netlist_.net_pins(net_id).size();
697740
VTR_ASSERT_SAFE_MSG(num_pins > 1, "net must have at least 2 pins");
698741

742+
double net_w = net_weights_[net_id];
743+
699744
// Find the bounding blocks
700745
APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_);
701746

@@ -706,19 +751,19 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
706751
for (APPinId pin_id : netlist_.net_pins(net_id)) {
707752
APBlockId blk_id = netlist_.pin_block(pin_id);
708753
if (blk_id != net_bounds.max_x_blk && blk_id != net_bounds.min_x_blk) {
709-
add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
710-
add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
754+
add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
755+
add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
711756
}
712757
if (blk_id != net_bounds.max_y_blk && blk_id != net_bounds.min_y_blk) {
713-
add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
714-
add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
758+
add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
759+
add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
715760
}
716761
}
717762

718763
// Connect the bounds to each other. Its just easier to put these here
719764
// instead of in the for loop above.
720-
add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
721-
add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
765+
add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
766+
add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
722767
}
723768

724769
// Build the sparse connectivity matrices from the triplets.

vpr/src/analytical_place/analytical_solver.h

+25-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
// Forward declarations
3232
class PartialPlacement;
3333
class APNetlist;
34+
class AtomNetlist;
35+
class PreClusterTimingManager;
3436

3537
/**
3638
* @brief A strong ID for the rows in a matrix used during solving.
@@ -60,7 +62,11 @@ class AnalyticalSolver {
6062
* Initializes the internal data members of the base class which are useful
6163
* for all solvers.
6264
*/
63-
AnalyticalSolver(const APNetlist& netlist, int log_verbosity);
65+
AnalyticalSolver(const APNetlist& netlist,
66+
const AtomNetlist& atom_netlist,
67+
const PreClusterTimingManager& pre_cluster_timing_manager,
68+
float ap_timing_tradeoff,
69+
int log_verbosity);
6470

6571
/**
6672
* @brief Run an iteration of the solver using the given partial placement
@@ -113,6 +119,12 @@ class AnalyticalSolver {
113119
/// solver.
114120
vtr::vector<APRowId, APBlockId> row_id_to_blk_id_;
115121

122+
/// @brief The base weight of each net in the AP netlist. This weight can
123+
/// be used to make the solver more interested in some nets over
124+
/// others. These weights can be any positive value, but are often
125+
/// between 0 and 1.
126+
vtr::vector<APNetId, float> net_weights_;
127+
116128
/// @brief The verbosity of log messages in the Analytical Solver.
117129
int log_verbosity_;
118130
};
@@ -123,6 +135,9 @@ class AnalyticalSolver {
123135
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
124136
const APNetlist& netlist,
125137
const DeviceGrid& device_grid,
138+
const AtomNetlist& atom_netlist,
139+
const PreClusterTimingManager& pre_cluster_timing_manager,
140+
float ap_timing_tradeoff,
126141
int log_verbosity);
127142

128143
// The Eigen library is used to solve matrix equations in the following solvers.
@@ -278,8 +293,11 @@ class QPHybridSolver : public AnalyticalSolver {
278293
*/
279294
QPHybridSolver(const APNetlist& netlist,
280295
const DeviceGrid& device_grid,
296+
const AtomNetlist& atom_netlist,
297+
const PreClusterTimingManager& pre_cluster_timing_manager,
298+
float ap_timing_tradeoff,
281299
int log_verbosity)
282-
: AnalyticalSolver(netlist, log_verbosity) {
300+
: AnalyticalSolver(netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity) {
283301
// Initializing the linear system only depends on the netlist and fixed
284302
// block locations. Both are provided by the netlist, allowing this to
285303
// be initialized in the constructor.
@@ -411,8 +429,11 @@ class B2BSolver : public AnalyticalSolver {
411429
public:
412430
B2BSolver(const APNetlist& ap_netlist,
413431
const DeviceGrid& device_grid,
432+
const AtomNetlist& atom_netlist,
433+
const PreClusterTimingManager& pre_cluster_timing_manager,
434+
float ap_timing_tradeoff,
414435
int log_verbosity)
415-
: AnalyticalSolver(ap_netlist, log_verbosity)
436+
: AnalyticalSolver(ap_netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity)
416437
, device_grid_width_(device_grid.width())
417438
, device_grid_height_(device_grid.height()) {}
418439

@@ -503,6 +524,7 @@ class B2BSolver : public AnalyticalSolver {
503524
void add_connection_to_system(APBlockId first_blk_id,
504525
APBlockId second_blk_id,
505526
size_t num_pins,
527+
double net_w,
506528
const vtr::vector<APBlockId, double>& blk_locs,
507529
std::vector<Eigen::Triplet<double>>& triplet_list,
508530
Eigen::VectorXd& b);

0 commit comments

Comments
 (0)