Skip to content

Commit d6fe4b5

Browse files
authored
Merge pull request #2969 from AlexandreSinger/feature-ap-timing
[AP][Timing] Added Basic Net Weighting
2 parents 433a917 + 475f528 commit d6fe4b5

File tree

25 files changed

+233
-60
lines changed

25 files changed

+233
-60
lines changed

doc/src/vpr/command_line_usage.rst

+9
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,15 @@ Analytical Placement is generally split into three stages:
12531253

12541254
**Default:** ``annealer``
12551255

1256+
.. option:: --ap_timing_tradeoff <float>
1257+
1258+
Controls the trade-off between wirelength (HPWL) and delay minimization in the AP flow.
1259+
1260+
A value of 0.0 makes the AP flow focus completely on wirelength minimization,
1261+
while a value of 1.0 makes the AP flow focus completely on timing optimization.
1262+
1263+
**Default:** ``0.5``
1264+
12561265
.. option:: --ap_verbosity <int>
12571266

12581267
Controls the verbosity of the AP flow output.

vpr/src/analytical_place/analytical_placement_flow.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
#include "analytical_placement_flow.h"
99
#include <memory>
10+
#include "PreClusterTimingManager.h"
1011
#include "analytical_solver.h"
1112
#include "ap_netlist.h"
1213
#include "atom_netlist.h"
14+
#include "cluster_util.h"
1315
#include "detailed_placer.h"
1416
#include "full_legalizer.h"
1517
#include "gen_ap_netlist_from_atoms.h"
@@ -120,6 +122,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
120122
const AtomNetlist& atom_nlist,
121123
const APNetlist& ap_netlist,
122124
const Prepacker& prepacker,
125+
const PreClusterTimingManager& pre_cluster_timing_manager,
123126
const DeviceContext& device_ctx) {
124127
if (g_vpr_ctx.atom().flat_placement_info().valid) {
125128
VTR_LOG("Flat Placement is provided in the AP flow, skipping the Global Placement.\n");
@@ -139,6 +142,8 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
139142
device_ctx.grid,
140143
device_ctx.logical_block_types,
141144
device_ctx.physical_tile_types,
145+
pre_cluster_timing_manager,
146+
ap_opts.ap_timing_tradeoff,
142147
ap_opts.log_verbosity);
143148
return global_placer->place();
144149
}
@@ -163,12 +168,25 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
163168
constraints);
164169
print_ap_netlist_stats(ap_netlist);
165170

171+
// Pre-compute the pre-clustering timing delays. This object will be passed
172+
// into the global placer and the full legalizer to make them timing driven.
173+
PreClusterTimingManager pre_cluster_timing_manager(vpr_setup.PackerOpts.timing_driven,
174+
atom_nlist,
175+
g_vpr_ctx.atom().lookup(),
176+
prepacker,
177+
vpr_setup.PackerOpts.timing_update_type,
178+
*device_ctx.arch,
179+
vpr_setup.RoutingArch,
180+
vpr_setup.PackerOpts.device_layout,
181+
vpr_setup.AnalysisOpts);
182+
166183
// Run the Global Placer.
167184
const t_ap_opts& ap_opts = vpr_setup.APOpts;
168185
PartialPlacement p_placement = run_global_placer(ap_opts,
169186
atom_nlist,
170187
ap_netlist,
171188
prepacker,
189+
pre_cluster_timing_manager,
172190
device_ctx);
173191

174192
// Verify that the partial placement is valid before running the full
@@ -185,6 +203,7 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
185203
ap_netlist,
186204
atom_nlist,
187205
prepacker,
206+
pre_cluster_timing_manager,
188207
vpr_setup,
189208
*device_ctx.arch,
190209
device_ctx.grid);

vpr/src/analytical_place/analytical_solver.cpp

+57-12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#include <memory>
1414
#include <utility>
1515
#include <vector>
16+
#include "PreClusterTimingManager.h"
17+
#include "atom_netlist.h"
18+
#include "atom_netlist_fwd.h"
1619
#include "device_grid.h"
1720
#include "flat_placement_types.h"
1821
#include "partial_placement.h"
@@ -42,23 +45,39 @@
4245
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
4346
const APNetlist& netlist,
4447
const DeviceGrid& device_grid,
48+
const AtomNetlist& atom_netlist,
49+
const PreClusterTimingManager& pre_cluster_timing_manager,
50+
float ap_timing_tradeoff,
4551
int log_verbosity) {
4652
// Based on the solver type passed in, build the solver.
4753
switch (solver_type) {
4854
case e_ap_analytical_solver::QP_Hybrid:
4955
#ifdef EIGEN_INSTALLED
50-
return std::make_unique<QPHybridSolver>(netlist, device_grid, log_verbosity);
56+
return std::make_unique<QPHybridSolver>(netlist,
57+
device_grid,
58+
atom_netlist,
59+
pre_cluster_timing_manager,
60+
ap_timing_tradeoff,
61+
log_verbosity);
5162
#else
5263
(void)netlist;
5364
(void)device_grid;
65+
(void)atom_netlist;
66+
(void)pre_cluster_timing_manager;
67+
(void)ap_timing_tradeoff;
5468
(void)log_verbosity;
5569
VPR_FATAL_ERROR(VPR_ERROR_AP,
5670
"QP Hybrid Solver requires the Eigen library");
5771
break;
5872
#endif // EIGEN_INSTALLED
5973
case e_ap_analytical_solver::LP_B2B:
6074
#ifdef EIGEN_INSTALLED
61-
return std::make_unique<B2BSolver>(netlist, device_grid, log_verbosity);
75+
return std::make_unique<B2BSolver>(netlist,
76+
device_grid,
77+
atom_netlist,
78+
pre_cluster_timing_manager,
79+
ap_timing_tradeoff,
80+
log_verbosity);
6281
#else
6382
VPR_FATAL_ERROR(VPR_ERROR_AP,
6483
"LP B2B Solver requires the Eigen library");
@@ -72,10 +91,15 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
7291
return nullptr;
7392
}
7493

75-
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
94+
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
95+
const AtomNetlist& atom_netlist,
96+
const PreClusterTimingManager& pre_cluster_timing_manager,
97+
float ap_timing_tradeoff,
98+
int log_verbosity)
7699
: netlist_(netlist)
77100
, blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID())
78101
, row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID())
102+
, net_weights_(netlist.nets().size(), 1.0f)
79103
, log_verbosity_(log_verbosity) {
80104
// Get the number of moveable blocks in the netlist and create a unique
81105
// row ID from [0, num_moveable_blocks) for each moveable block in the
@@ -94,6 +118,21 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
94118
current_row_id++;
95119
num_moveable_blocks_++;
96120
}
121+
122+
if (pre_cluster_timing_manager.is_valid()) {
123+
for (APNetId net_id : netlist.nets()) {
124+
// Get the atom net associated with the given AP net. When
125+
// constructing the AP netlist, we happen to set the name of each
126+
// AP net to the same name as the atom net that generated them!
127+
// TODO: Create a proper lookup structure to go from the AP Netlist
128+
// back to the Atom Netlist.
129+
AtomNetId atom_net_id = atom_netlist.find_net(netlist.net_name(net_id));
130+
VTR_ASSERT(atom_net_id.is_valid());
131+
float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist);
132+
133+
net_weights_[net_id] = ap_timing_tradeoff * crit + (1.0f - ap_timing_tradeoff);
134+
}
135+
}
97136
}
98137

99138
#ifdef EIGEN_INSTALLED
@@ -201,12 +240,15 @@ void QPHybridSolver::init_linear_system() {
201240
for (APNetId net_id : netlist_.nets()) {
202241
size_t num_pins = netlist_.net_pins(net_id).size();
203242
VTR_ASSERT_DEBUG(num_pins > 1);
243+
244+
double net_weight = net_weights_[net_id];
245+
204246
if (num_pins > star_num_pins_threshold) {
205247
// Create a star node and connect each block in the net to the star
206248
// node.
207249
// Using the weight from FastPlace
208250
// TODO: Investigate other weight terms.
209-
double w = static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
251+
double w = net_weight * static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
210252
size_t star_node_id = num_moveable_blocks_ + star_node_offset;
211253
for (APPinId pin_id : netlist_.net_pins(net_id)) {
212254
APBlockId blk_id = netlist_.pin_block(pin_id);
@@ -220,7 +262,7 @@ void QPHybridSolver::init_linear_system() {
220262
// exactly once to every other block in the net.
221263
// Using the weight from FastPlace
222264
// TODO: Investigate other weight terms.
223-
double w = 1.0 / static_cast<double>(num_pins - 1);
265+
double w = net_weight * 1.0 / static_cast<double>(num_pins - 1);
224266
for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) {
225267
APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx);
226268
APBlockId first_blk_id = netlist_.pin_block(first_pin_id);
@@ -638,6 +680,7 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id,
638680
void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
639681
APBlockId second_blk_id,
640682
size_t num_pins,
683+
double net_w,
641684
const vtr::vector<APBlockId, double>& blk_locs,
642685
std::vector<Eigen::Triplet<double>>& triplet_list,
643686
Eigen::VectorXd& b) {
@@ -660,7 +703,7 @@ void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
660703
// The denominator of weight is zero, which causes infinity term in the matrix. Another way of
661704
// interpreting epsilon is the minimum distance two nodes are considered to be in placement.
662705
double dist = std::max(std::abs(blk_locs[first_blk_id] - blk_locs[second_blk_id]), distance_epsilon_);
663-
double w = (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
706+
double w = net_w * (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
664707

665708
// Update the connectivity matrix and the constant vector.
666709
// This is similar to how connections are added for the quadratic formulation.
@@ -696,6 +739,8 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
696739
size_t num_pins = netlist_.net_pins(net_id).size();
697740
VTR_ASSERT_SAFE_MSG(num_pins > 1, "net must have at least 2 pins");
698741

742+
double net_w = net_weights_[net_id];
743+
699744
// Find the bounding blocks
700745
APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_);
701746

@@ -706,19 +751,19 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
706751
for (APPinId pin_id : netlist_.net_pins(net_id)) {
707752
APBlockId blk_id = netlist_.pin_block(pin_id);
708753
if (blk_id != net_bounds.max_x_blk && blk_id != net_bounds.min_x_blk) {
709-
add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
710-
add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
754+
add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
755+
add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
711756
}
712757
if (blk_id != net_bounds.max_y_blk && blk_id != net_bounds.min_y_blk) {
713-
add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
714-
add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
758+
add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
759+
add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
715760
}
716761
}
717762

718763
// Connect the bounds to each other. Its just easier to put these here
719764
// instead of in the for loop above.
720-
add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
721-
add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
765+
add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
766+
add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
722767
}
723768

724769
// Build the sparse connectivity matrices from the triplets.

vpr/src/analytical_place/analytical_solver.h

+25-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
// Forward declarations
3232
class PartialPlacement;
3333
class APNetlist;
34+
class AtomNetlist;
35+
class PreClusterTimingManager;
3436

3537
/**
3638
* @brief A strong ID for the rows in a matrix used during solving.
@@ -60,7 +62,11 @@ class AnalyticalSolver {
6062
* Initializes the internal data members of the base class which are useful
6163
* for all solvers.
6264
*/
63-
AnalyticalSolver(const APNetlist& netlist, int log_verbosity);
65+
AnalyticalSolver(const APNetlist& netlist,
66+
const AtomNetlist& atom_netlist,
67+
const PreClusterTimingManager& pre_cluster_timing_manager,
68+
float ap_timing_tradeoff,
69+
int log_verbosity);
6470

6571
/**
6672
* @brief Run an iteration of the solver using the given partial placement
@@ -113,6 +119,12 @@ class AnalyticalSolver {
113119
/// solver.
114120
vtr::vector<APRowId, APBlockId> row_id_to_blk_id_;
115121

122+
/// @brief The base weight of each net in the AP netlist. This weight can
123+
/// be used to make the solver more interested in some nets over
124+
/// others. These weights can be any positive value, but are often
125+
/// between 0 and 1.
126+
vtr::vector<APNetId, float> net_weights_;
127+
116128
/// @brief The verbosity of log messages in the Analytical Solver.
117129
int log_verbosity_;
118130
};
@@ -123,6 +135,9 @@ class AnalyticalSolver {
123135
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
124136
const APNetlist& netlist,
125137
const DeviceGrid& device_grid,
138+
const AtomNetlist& atom_netlist,
139+
const PreClusterTimingManager& pre_cluster_timing_manager,
140+
float ap_timing_tradeoff,
126141
int log_verbosity);
127142

128143
// The Eigen library is used to solve matrix equations in the following solvers.
@@ -278,8 +293,11 @@ class QPHybridSolver : public AnalyticalSolver {
278293
*/
279294
QPHybridSolver(const APNetlist& netlist,
280295
const DeviceGrid& device_grid,
296+
const AtomNetlist& atom_netlist,
297+
const PreClusterTimingManager& pre_cluster_timing_manager,
298+
float ap_timing_tradeoff,
281299
int log_verbosity)
282-
: AnalyticalSolver(netlist, log_verbosity) {
300+
: AnalyticalSolver(netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity) {
283301
// Initializing the linear system only depends on the netlist and fixed
284302
// block locations. Both are provided by the netlist, allowing this to
285303
// be initialized in the constructor.
@@ -411,8 +429,11 @@ class B2BSolver : public AnalyticalSolver {
411429
public:
412430
B2BSolver(const APNetlist& ap_netlist,
413431
const DeviceGrid& device_grid,
432+
const AtomNetlist& atom_netlist,
433+
const PreClusterTimingManager& pre_cluster_timing_manager,
434+
float ap_timing_tradeoff,
414435
int log_verbosity)
415-
: AnalyticalSolver(ap_netlist, log_verbosity)
436+
: AnalyticalSolver(ap_netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity)
416437
, device_grid_width_(device_grid.width())
417438
, device_grid_height_(device_grid.height()) {}
418439

@@ -503,6 +524,7 @@ class B2BSolver : public AnalyticalSolver {
503524
void add_connection_to_system(APBlockId first_blk_id,
504525
APBlockId second_blk_id,
505526
size_t num_pins,
527+
double net_w,
506528
const vtr::vector<APBlockId, double>& blk_locs,
507529
std::vector<Eigen::Triplet<double>>& triplet_list,
508530
Eigen::VectorXd& b);

0 commit comments

Comments
 (0)