diff --git a/src/dpl/BUILD b/src/dpl/BUILD index 7c0a55458a2..fbc413ab70a 100644 --- a/src/dpl/BUILD +++ b/src/dpl/BUILD @@ -52,6 +52,8 @@ cc_library( "src/optimization/detailed_generator.h", "src/optimization/detailed_global.cxx", "src/optimization/detailed_global.h", + "src/optimization/detailed_global_legacy.cxx", + "src/optimization/detailed_global_legacy.h", "src/optimization/detailed_manager.cxx", "src/optimization/detailed_manager.h", "src/optimization/detailed_mis.cxx", diff --git a/src/dpl/CMakeLists.txt b/src/dpl/CMakeLists.txt index 95b4487a417..5996773d5ce 100644 --- a/src/dpl/CMakeLists.txt +++ b/src/dpl/CMakeLists.txt @@ -38,6 +38,7 @@ add_library(dpl_lib src/optimization/detailed_mis.cxx src/optimization/detailed_reorder.cxx src/optimization/detailed_global.cxx + src/optimization/detailed_global_legacy.cxx src/optimization/detailed_vertical.cxx src/objective/detailed_displacement.cxx src/objective/detailed_hpwl.cxx diff --git a/src/dpl/include/dpl/Opendp.h b/src/dpl/include/dpl/Opendp.h index 4d921ad9d11..4082d8c10d9 100644 --- a/src/dpl/include/dpl/Opendp.h +++ b/src/dpl/include/dpl/Opendp.h @@ -79,6 +79,20 @@ using IRDropByPoint = std::map; struct GapInfo; struct DecapCell; struct IRDrop; + +struct GlobalSwapParams +{ + int passes = 2; + double tolerance = 0.01; + double tradeoff = 0.4; + double profiling_excess = 1.10; + std::vector budget_multipliers{1.50, 1.25, 1.10, 1.04}; + double area_weight = 0.4; + double pin_weight = 0.6; + double user_congestion_weight = 35.0; + int sampling_moves = 150; + int normalization_interval = 1000; +}; //////////////////////////////////////////////////////////////// class Opendp @@ -118,6 +132,23 @@ class Opendp bool verbose); void removeFillers(); void optimizeMirroring(); + void resetGlobalSwapParams(); + void configureGlobalSwapParams(int passes, + double tolerance, + double tradeoff, + double area_weight, + double pin_weight, + double user_weight, + int sampling_moves, + int normalization_interval, + double profiling_excess, + const std::vector& budget_multipliers); + const GlobalSwapParams& getGlobalSwapParams() const + { + return global_swap_params_; + } + void setExtraDplEnabled(bool enabled) { extra_dpl_enabled_ = enabled; } + bool isExtraDplEnabled() const { return extra_dpl_enabled_; } // Place decap cells void addDecapMaster(odb::dbMaster* decap_master, double decap_cap); @@ -355,6 +386,8 @@ class Opendp static constexpr double group_refine_percent_ = .05; static constexpr double refine_percent_ = .02; static constexpr int rand_seed_ = 777; + GlobalSwapParams global_swap_params_; + bool extra_dpl_enabled_ = false; }; int divRound(int dividend, int divisor); diff --git a/src/dpl/src/Opendp.cpp b/src/dpl/src/Opendp.cpp index 6a67c5a2489..45fc7056074 100644 --- a/src/dpl/src/Opendp.cpp +++ b/src/dpl/src/Opendp.cpp @@ -14,10 +14,12 @@ #include #include "PlacementDRC.h" -#include "boost/geometry/geometry.hpp" +#include "boost/geometry/index/predicates.hpp" #include "dpl/OptMirror.h" #include "graphics/DplObserver.h" #include "infrastructure/Coordinates.h" +// NOLINTNEXTLINE(misc-include-cleaner) // Needed for DecapCell/GapInfo +// completeness in ~Opendp() #include "infrastructure/DecapObjects.h" #include "infrastructure/Grid.h" #include "infrastructure/Objects.h" @@ -220,6 +222,67 @@ void Opendp::optimizeMirroring() opt.run(); } +void Opendp::resetGlobalSwapParams() +{ + global_swap_params_ = GlobalSwapParams(); +} + +void Opendp::configureGlobalSwapParams( + int passes, + double tolerance, + double tradeoff, + double area_weight, + double pin_weight, + double user_weight, + int sampling_moves, + int normalization_interval, + double profiling_excess, + const std::vector& budget_multipliers) +{ + if (passes > 0) { + global_swap_params_.passes = passes; + } + if (tolerance > 0.0) { + global_swap_params_.tolerance = tolerance; + } + if (tradeoff >= 0.0) { + global_swap_params_.tradeoff = std::max(0.0, std::min(1.0, tradeoff)); + } + if (area_weight >= 0.0) { + global_swap_params_.area_weight = area_weight; + } + if (pin_weight >= 0.0) { + global_swap_params_.pin_weight = pin_weight; + } + if (user_weight > 0.0) { + global_swap_params_.user_congestion_weight = user_weight; + } + if (sampling_moves > 0) { + global_swap_params_.sampling_moves = sampling_moves; + } + if (normalization_interval > 0) { + global_swap_params_.normalization_interval = normalization_interval; + } + if (profiling_excess > 0.0) { + global_swap_params_.profiling_excess = profiling_excess; + } + if (!budget_multipliers.empty()) { + global_swap_params_.budget_multipliers = budget_multipliers; + } + if (global_swap_params_.budget_multipliers.empty()) { + global_swap_params_.budget_multipliers = {1.0}; + } + if (global_swap_params_.area_weight < 0.0 + || global_swap_params_.pin_weight < 0.0) { + logger_->error(DPL, 1280, "Utilization weights must be non-negative."); + } + if (global_swap_params_.area_weight == 0.0 + && global_swap_params_.pin_weight == 0.0) { + logger_->error( + DPL, 1281, "At least one utilization weight must be greater than 0."); + } +} + int Opendp::disp(const Node* cell) const { const DbuPt init = initialLocation(cell, false); diff --git a/src/dpl/src/Opendp.i b/src/dpl/src/Opendp.i index 05adfe7b9d0..5c42494ca0f 100644 --- a/src/dpl/src/Opendp.i +++ b/src/dpl/src/Opendp.i @@ -4,6 +4,8 @@ // clang-format off %{ +#include + #include "ord/OpenRoad.hh" #include "graphics/Graphics.h" #include "graphics/DplObserver.h" @@ -113,6 +115,51 @@ void improve_placement_cmd(int seed, opendp->improvePlacement(seed, max_displacement_x, max_displacement_y); } +void reset_global_swap_params_cmd() +{ + dpl::Opendp* opendp = ord::OpenRoad::openRoad()->getOpendp(); + opendp->resetGlobalSwapParams(); +} + +void configure_global_swap_params_cmd(int passes, + double tolerance, + double tradeoff, + double area_weight, + double pin_weight, + double user_weight, + int sampling_moves, + int normalization_interval, + double profiling_excess, + const char* budget_multipliers_str) +{ + std::vector budget_multipliers; + if (budget_multipliers_str != nullptr) { + std::stringstream ss(budget_multipliers_str); + double value; + while (ss >> value) { + budget_multipliers.push_back(value); + } + } + + dpl::Opendp* opendp = ord::OpenRoad::openRoad()->getOpendp(); + opendp->configureGlobalSwapParams(passes, + tolerance, + tradeoff, + area_weight, + pin_weight, + user_weight, + sampling_moves, + normalization_interval, + profiling_excess, + budget_multipliers); +} + +void set_extra_dpl_cmd(bool enable) +{ + dpl::Opendp* opendp = ord::OpenRoad::openRoad()->getOpendp(); + opendp->setExtraDplEnabled(enable); +} + } // namespace %} // inline diff --git a/src/dpl/src/Opendp.tcl b/src/dpl/src/Opendp.tcl index 44eaa504aae..c2aa471fc4a 100644 --- a/src/dpl/src/Opendp.tcl +++ b/src/dpl/src/Opendp.tcl @@ -159,12 +159,15 @@ proc optimize_mirroring { args } { sta::define_cmd_args "improve_placement" {\ [-random_seed seed]\ [-max_displacement disp|{disp_x disp_y}]\ + [-global_swap_args {options}]\ + [-enable_extra_dpl bool]\ [-disallow_one_site_gaps]\ } proc improve_placement { args } { sta::parse_key_args "improve_placement" args \ - keys {-random_seed -max_displacement} flags {-disallow_one_site_gaps} + keys {-random_seed -max_displacement -global_swap_args -enable_extra_dpl} \ + flags {-disallow_one_site_gaps} if { [ord::get_db_block] == "NULL" } { utl::error DPL 342 "No design block found." @@ -196,6 +199,96 @@ proc improve_placement { args } { set max_displacement_y 0 } + dpl::reset_global_swap_params_cmd + if { [info exists keys(-global_swap_args)] } { + set global_swap_passes -1 + set global_swap_tolerance -1 + set global_swap_tradeoff -1 + set global_swap_area_weight -1 + set global_swap_pin_weight -1 + set global_swap_user_weight -1 + set global_swap_sampling -1 + set global_swap_normalization -1 + set global_swap_profiling_excess -1 + set global_swap_budget_list {} + + set global_swap_args $keys(-global_swap_args) + if { ([llength $global_swap_args] % 2) != 0 } { + sta::error DPL 345 "-global_swap_args must be key/value pairs" + } + foreach {opt value} $global_swap_args { + switch -- $opt { + -passes { + set global_swap_passes $value + } + -tolerance { + set global_swap_tolerance $value + } + -tradeoff { + set global_swap_tradeoff $value + } + -area_weight { + set global_swap_area_weight $value + } + -pin_weight { + set global_swap_pin_weight $value + } + -congestion_user_weight { + set global_swap_user_weight $value + } + -sampling_moves { + set global_swap_sampling $value + } + -normalization_interval { + set global_swap_normalization $value + } + -profiling_excess { + set global_swap_profiling_excess $value + } + -budget_multipliers { + set global_swap_budget_list {} + foreach multiplier $value { + set trimmed [string trim $multiplier] + if { $trimmed eq "" } { + continue + } + if { [catch { expr { double($trimmed) } } parsed] } { + sta::error DPL 347 "Invalid -budget_multipliers value \"$multiplier\"" + } + lappend global_swap_budget_list $parsed + } + } + default { + sta::error DPL 346 "Unknown -global_swap_args option $opt" + } + } + } + set global_swap_budget_str "" + if { [llength $global_swap_budget_list] > 0 } { + set global_swap_budget_str [join $global_swap_budget_list " "] + } + dpl::configure_global_swap_params_cmd \ + $global_swap_passes \ + $global_swap_tolerance \ + $global_swap_tradeoff \ + $global_swap_area_weight \ + $global_swap_pin_weight \ + $global_swap_user_weight \ + $global_swap_sampling \ + $global_swap_normalization \ + $global_swap_profiling_excess \ + $global_swap_budget_str + } + + set extra_dpl_enabled 0 + if { [info exists keys(-enable_extra_dpl)] } { + set extra_dpl_enabled $keys(-enable_extra_dpl) + } elseif { [info exists ::env(ENABLE_EXTRA_DPL)] } { + set extra_dpl_enabled $::env(ENABLE_EXTRA_DPL) + } + set extra_dpl_enabled [expr { $extra_dpl_enabled ? 1 : 0 }] + dpl::set_extra_dpl_cmd $extra_dpl_enabled + sta::check_argc_eq0 "improve_placement" $args dpl::improve_placement_cmd $seed $max_displacement_x $max_displacement_y } diff --git a/src/dpl/src/Optdp.cpp b/src/dpl/src/Optdp.cpp index 08107d06607..a9bae97271d 100644 --- a/src/dpl/src/Optdp.cpp +++ b/src/dpl/src/Optdp.cpp @@ -75,6 +75,8 @@ void Opendp::improvePlacement(const int seed, // A manager to track cells. DetailedMgr mgr(arch_.get(), network_.get(), grid_.get(), drc_engine_.get()); mgr.setLogger(logger_); + mgr.setGlobalSwapParams(global_swap_params_); + mgr.setExtraDplEnabled(extra_dpl_enabled_); // Various settings. mgr.setSeed(seed); mgr.setMaxDisplacement(max_displacement_x, max_displacement_y); diff --git a/src/dpl/src/infrastructure/Grid.cpp b/src/dpl/src/infrastructure/Grid.cpp index c973625f41f..a6842f4de72 100644 --- a/src/dpl/src/infrastructure/Grid.cpp +++ b/src/dpl/src/infrastructure/Grid.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include "Padding.h" #include "boost/polygon/polygon.hpp" #include "dpl/Opendp.h" +#include "network.h" #include "odb/db.h" #include "odb/dbShape.h" #include "odb/dbTransform.h" @@ -779,4 +781,198 @@ DbuY Grid::rowHeight(GridY index) return row_index_to_pixel_height_.at(index.v); } +int Grid::countValidPixels(GridX x_begin, + GridY y_begin, + GridX x_end, + GridY y_end) const +{ + int count = 0; + for (GridY y = y_begin; y < y_end; y++) { + for (GridX x = x_begin; x < x_end; x++) { + Pixel* pixel = gridPixel(x, y); + if (pixel != nullptr && pixel->is_valid) { + ++count; + } + } + } + return count; +} + +void Grid::applyCellContribution(Node* node, + GridX x_begin, + GridY y_begin, + GridX x_end, + GridY y_end, + float scale) +{ + const int cell_pixel_count = countValidPixels(x_begin, y_begin, x_end, y_end); + if (cell_pixel_count == 0) { + return; + } + if (total_area_.empty()) { + return; + } + + const float cell_area + = static_cast(node->getWidth().v * node->getHeight().v); + const float area_per_pixel + = (cell_area / static_cast(cell_pixel_count)) * scale; + const float pins_per_pixel + = (static_cast(node->getNumPins()) / cell_pixel_count) * scale; + + const int grid_size = static_cast(total_area_.size()); + for (GridY y = y_begin; y < y_end; y++) { + for (GridX x = x_begin; x < x_end; x++) { + Pixel* pixel = gridPixel(x, y); + if (pixel == nullptr || !pixel->is_valid) { + continue; + } + const int pixel_idx = (y.v * row_site_count_.v) + x.v; + if (pixel_idx < 0 || pixel_idx >= grid_size) { + continue; + } + total_area_[pixel_idx] += area_per_pixel; + total_pins_[pixel_idx] += pins_per_pixel; + total_area_[pixel_idx] = max(total_area_[pixel_idx], 0.0f); + total_pins_[pixel_idx] = max(total_pins_[pixel_idx], 0.0f); + } + } +} + +void Grid::computeUtilizationMap(Network* network, + float area_weight, + float pin_weight) +{ + // Get grid dimensions + const int grid_size = row_count_.v * row_site_count_.v; + + if (grid_size == 0) { + return; // No grid to work with + } + + // Store weights for incremental updates + area_weight_ = area_weight; + pin_weight_ = pin_weight; + + // Initialize member vectors for area and pin density accumulation + total_area_.assign(grid_size, 0.0f); + total_pins_.assign(grid_size, 0.0f); + utilization_density_.assign(grid_size, 0.0f); + + // Iterate through all movable nodes in the network + for (const auto& node_ptr : network->getNodes()) { + Node* node = node_ptr.get(); + if (!node || node->isFixed() || node->getType() != Node::Type::CELL) { + continue; // Skip fixed cells and non-standard cells + } + + const GridRect cell_grid = gridCovering(node); + applyCellContribution( + node, cell_grid.xlo, cell_grid.ylo, cell_grid.xhi, cell_grid.yhi, 1.0f); + } + + normalizeUtilization(); +} + +void Grid::normalizeUtilization() +{ + const int grid_size = total_area_.size(); + if (grid_size == 0) { + return; + } + + // Find maximum values for normalization + float max_area = 0.0f; + float max_pins = 0.0f; + + // We iterate manually to find max to avoid multiple passes or copies + for (float value : total_area_) { + max_area = std::max(value, max_area); + } + for (float value : total_pins_) { + max_pins = std::max(value, max_pins); + } + + if (max_area <= 0.0f || max_pins <= 0.0f) { + if (logger_ != nullptr) { + logger_->error( + DPL, + 1300, + "Utilization normalization failed: max area {} max pins {}.", + max_area, + max_pins); + } else { + throw std::runtime_error( + "Utilization normalization failed: zero area or pins detected."); + } + } + last_max_area_ = max_area; + last_max_pins_ = max_pins; + + // Calculate weighted power density for each pixel + float max_util_density = 0.0f; + for (int i = 0; i < grid_size; i++) { + const float normalized_area = total_area_[i] / max_area; + const float normalized_pins = total_pins_[i] / max_pins; + const float val + = (area_weight_ * normalized_area) + (pin_weight_ * normalized_pins); + utilization_density_[i] = val; + max_util_density = std::max(val, max_util_density); + } + + // Final normalization of power density to [0, 1] range + if (max_util_density > 0.0f) { + for (float& density : utilization_density_) { + density /= max_util_density; + } + } + last_max_utilization_ = max_util_density > 0.0f ? max_util_density : 1.0f; + utilization_dirty_ = false; +} + +void Grid::updateUtilizationMap(Node* node, DbuX x, DbuY y, bool add) +{ + if (!node || node->isFixed() || node->getType() != Node::Type::CELL) { + return; // Skip invalid, fixed, or non-standard cells + } + + // Calculate grid rectangle for the cell at the given position + const GridX grid_x = gridX(x); + const GridY grid_y = gridSnapDownY(y); + const GridX grid_x_end = grid_x + gridWidth(node); + const GridY grid_y_end = gridEndY(y + node->getHeight()); + + const float sign = add ? 1.0f : -1.0f; + applyCellContribution(node, grid_x, grid_y, grid_x_end, grid_y_end, sign); + utilization_dirty_ = true; +} + +float Grid::getUtilizationDensity(int pixel_idx) const +{ + // When the map is marked dirty we reuse the maxima from the last full + // normalization to produce an approximate normalized value. This avoids + // recomputing the entire map on every query while still reflecting the most + // recent raw contributions. + + if (pixel_idx < 0 + || pixel_idx >= static_cast(utilization_density_.size())) { + return 0.0f; + } + + if (!utilization_dirty_) { + return utilization_density_[pixel_idx]; + } + + if (last_max_area_ <= 0.0f || last_max_pins_ <= 0.0f + || last_max_utilization_ <= 0.0f) { + return utilization_density_[pixel_idx]; + } + + const float normalized_area = total_area_[pixel_idx] / last_max_area_; + const float normalized_pins = total_pins_[pixel_idx] / last_max_pins_; + const float val + = (area_weight_ * normalized_area) + (pin_weight_ * normalized_pins); + return std::min(val / last_max_utilization_, 1.0f); +} + } // namespace dpl diff --git a/src/dpl/src/infrastructure/Grid.h b/src/dpl/src/infrastructure/Grid.h index 0ec536ace6c..aba213b0cd3 100644 --- a/src/dpl/src/infrastructure/Grid.h +++ b/src/dpl/src/infrastructure/Grid.h @@ -23,6 +23,8 @@ namespace dpl { +class Network; + struct GridIntervalX { GridX lo; @@ -155,6 +157,14 @@ class Grid bool isMultiHeight(odb::dbMaster* master) const; + // Utilization-aware placement support + void computeUtilizationMap(Network* network, + float area_weight, + float pin_weight); + void updateUtilizationMap(Node* node, DbuX x, DbuY y, bool add); + float getUtilizationDensity(int pixel_idx) const; + void normalizeUtilization(); + private: // Maps a site to the right orientation to use in a given row using SiteToOrientation = std::map; @@ -211,6 +221,28 @@ class Grid GridY row_count_{0}; GridX row_site_count_{0}; + + // Utilization density map + std::vector utilization_density_; + std::vector total_area_; + std::vector total_pins_; + float area_weight_ = 0.0f; + float pin_weight_ = 0.0f; + bool utilization_dirty_ = false; + float last_max_area_ = 1.0f; + float last_max_pins_ = 1.0f; + float last_max_utilization_ = 1.0f; + + int countValidPixels(GridX x_begin, + GridY y_begin, + GridX x_end, + GridY y_end) const; + void applyCellContribution(Node* node, + GridX x_begin, + GridY y_begin, + GridX x_end, + GridY y_end, + float scale); }; } // namespace dpl diff --git a/src/dpl/src/optimization/detailed.cxx b/src/dpl/src/optimization/detailed.cxx index c27c72f2e5a..0f81495143e 100644 --- a/src/dpl/src/optimization/detailed.cxx +++ b/src/dpl/src/optimization/detailed.cxx @@ -6,17 +6,14 @@ #include #include -#include "util/utility.h" -#include "utl/Logger.h" - -// Detailed management of segments. -#include "infrastructure/detailed_segment.h" #include "optimization/detailed_manager.h" +#include "utl/Logger.h" // Detailed placement algorithms. #include "boost/token_functions.hpp" #include "boost/tokenizer.hpp" #include "detailed.h" #include "optimization/detailed_global.h" +#include "optimization/detailed_global_legacy.h" #include "optimization/detailed_mis.h" #include "optimization/detailed_orient.h" #include "optimization/detailed_random.h" @@ -143,8 +140,13 @@ void Detailed::doDetailedCommand(std::vector& args) DetailedMis mis(arch_, network_); mis.run(mgr_, args); } else if (strcmp(args[0].c_str(), "gs") == 0) { - DetailedGlobalSwap gs(arch_, network_); - gs.run(mgr_, args); + if (mgr_->isExtraDplEnabled()) { + DetailedGlobalSwap gs(arch_, network_); + gs.run(mgr_, args); + } else { + legacy::DetailedGlobalSwap gs(arch_, network_); + gs.run(mgr_, args); + } } else if (strcmp(args[0].c_str(), "vs") == 0) { DetailedVerticalSwap vs(arch_, network_); vs.run(mgr_, args); diff --git a/src/dpl/src/optimization/detailed_global.cxx b/src/dpl/src/optimization/detailed_global.cxx index cc71b898629..837b3d94d59 100644 --- a/src/dpl/src/optimization/detailed_global.cxx +++ b/src/dpl/src/optimization/detailed_global.cxx @@ -8,14 +8,21 @@ #include #include #include +#include #include #include +#include "boost/token_functions.hpp" #include "boost/tokenizer.hpp" +#include "detailed_generator.h" #include "detailed_manager.h" #include "dpl/Opendp.h" +#include "infrastructure/Grid.h" #include "infrastructure/Objects.h" +#include "infrastructure/network.h" #include "objective/detailed_hpwl.h" +#include "util/journal.h" +#include "util/utility.h" #include "utl/Logger.h" namespace dpl { @@ -63,46 +70,70 @@ void DetailedGlobalSwap::run(DetailedMgr* mgrPtr, const std::string& command) void DetailedGlobalSwap::run(DetailedMgr* mgrPtr, std::vector& args) { - // Given the arguments, figure out which routine to run to do the reordering. + // Two-pass budget-constrained congestion-aware optimization using + // Journal-based state management mgr_ = mgrPtr; arch_ = mgr_->getArchitecture(); network_ = mgr_->getNetwork(); + swap_params_ = &mgr_->getGlobalSwapParams(); + const GlobalSwapParams& params = *swap_params_; + + int passes = params.passes; + double tol = params.tolerance; + tradeoff_ = params.tradeoff; - int passes = 1; - double tol = 0.01; for (size_t i = 1; i < args.size(); i++) { if (args[i] == "-p" && i + 1 < args.size()) { passes = std::atoi(args[++i].c_str()); } else if (args[i] == "-t" && i + 1 < args.size()) { tol = std::atof(args[++i].c_str()); + } else if (args[i] == "-x" && i + 1 < args.size()) { + tradeoff_ = std::atof(args[++i].c_str()); } } passes = std::max(passes, 1); tol = std::max(tol, 0.01); + tradeoff_ = std::max(0.0, std::min(1.0, tradeoff_)); // Clamp to [0.0, 1.0] - int64_t last_hpwl, curr_hpwl, init_hpwl; uint64_t hpwl_x, hpwl_y; - - curr_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); - init_hpwl = curr_hpwl; + int64_t init_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); if (init_hpwl == 0) { return; } + + // Store original displacement limits for restoration later + int orig_disp_x, orig_disp_y; + mgr_->getMaxDisplacement(orig_disp_x, orig_disp_y); + mgr_->getLogger()->info( + DPL, + 906, + "Starting two-pass congestion-aware global swap optimization " + "(tradeoff={:.1f})", + tradeoff_); + + const int chip_width = arch_->getMaxX().v - arch_->getMinX().v; + const int chip_height = arch_->getMaxY().v - arch_->getMinY().v; + + // PASS 1: HPWL Profiling Pass + mgr_->getLogger()->info( + DPL, 907, "Pass 1: HPWL profiling to determine budget"); + + // Clear journal to ensure clean state tracking for profiling pass + mgr_->getJournal().clear(); + + is_profiling_pass_ = true; + congestion_weight_ = 0.0; // Pure HPWL optimization + + int64_t last_hpwl, curr_hpwl = init_hpwl; for (int p = 1; p <= passes; p++) { last_hpwl = curr_hpwl; - - // XXX: Actually, global swapping is nothing more than random - // greedy improvement in which the move generating is done - // using this object to generate a target which is the optimal - // region for each candidate cell. globalSwap(); - curr_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); mgr_->getLogger()->info(DPL, - 306, - "Pass {:3d} of global swaps; hpwl is {:.6e}.", + 316, + "Profiling pass {:d}; hpwl is {:.6e}.", p, (double) curr_hpwl); @@ -111,21 +142,187 @@ void DetailedGlobalSwap::run(DetailedMgr* mgrPtr, break; } } - double curr_imp = (((init_hpwl - curr_hpwl) / (double) init_hpwl) * 100.); + + // Calculate budget allowance from profiling pass + double optimal_hpwl = curr_hpwl; + double profiling_excess = params.profiling_excess; + if (profiling_excess <= 0.0) { + profiling_excess = 1.10; + } + budget_hpwl_ = optimal_hpwl * profiling_excess; + const double budget_pct = ((budget_hpwl_ / optimal_hpwl) - 1.0) * 100.0; + mgr_->getLogger()->info( + DPL, + 908, + "Profiling complete. Optimal HPWL={:.2f}, Budget HPWL={:.2f} ({:+.1f}%)", + optimal_hpwl, + budget_hpwl_, + budget_pct); + + // Restore initial state using Journal's built-in undo mechanism mgr_->getLogger()->info(DPL, - 307, - "End of global swaps; objective is {:.6e}, " - "improvement is {:.2f} percent.", - (double) curr_hpwl, - curr_imp); + 917, + "Undoing {} profiling moves to restore initial state", + mgr_->getJournal().size()); + mgr_->getJournal().undo(); + mgr_->getJournal().clear(); // Clear journal for second pass + + // PASS 2: Iterative Budget-Constrained Congestion Optimization (4 iterations) + mgr_->getLogger()->info(DPL, + 909, + "Pass 2: Iterative budget-constrained congestion " + "optimization (4 stages)"); + is_profiling_pass_ = false; + + // Re-compute utilization density map to ensure it's synchronized with + // restored placement + const float area_weight = static_cast(params.area_weight); + const float pin_weight = static_cast(params.pin_weight); + mgr_->getGrid()->computeUtilizationMap(network_, area_weight, pin_weight); + mgr_->getLogger()->info( + DPL, 918, "Re-computed utilization density map after state restoration"); + + // Calculate adaptive congestion weight once for all iterations + congestion_weight_ = calculateAdaptiveCongestionWeight(); + + // Define the iterative refinement schedule + std::vector budget_multipliers = params.budget_multipliers; + if (budget_multipliers.empty()) { + budget_multipliers = {1.10}; + } + const std::vector stage_names + = {"Exploratory", "Consolidation", "Fine-tuning", "Final Polish"}; + + curr_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); + + // Iterative refinement loop + for (size_t iteration = 0; iteration < budget_multipliers.size(); + iteration++) { + // Update budget for this iteration + budget_hpwl_ = optimal_hpwl * budget_multipliers[iteration]; + std::string stage_name; + if (iteration < stage_names.size()) { + stage_name = stage_names[iteration]; + } else { + stage_name = "Stage " + std::to_string(iteration + 1); + } + if (iteration == 0) { + mgr_->setMaxDisplacement(chip_width, chip_height); + mgr_->getLogger()->info(DPL, + 921, + "Iteration {} ({}): temporary displacement set " + "to chip dimensions ({}, {})", + iteration + 1, + stage_name, + chip_width, + chip_height); + } else if (iteration == 1) { + mgr_->setMaxDisplacement(orig_disp_x * 10, orig_disp_y * 10); + mgr_->getLogger()->info( + DPL, + 922, + "Iteration {} ({}): displacement relaxed to 10x original ({}, {})", + iteration + 1, + stage_name, + orig_disp_x * 10, + orig_disp_y * 10); + } else { + mgr_->setMaxDisplacement(orig_disp_x, orig_disp_y); + mgr_->getLogger()->info( + DPL, + 923, + "Iteration {} ({}): displacement restored to original ({}, {})", + iteration + 1, + stage_name, + orig_disp_x, + orig_disp_y); + } + + mgr_->getLogger()->info( + DPL, + 919, + "Iteration {}: {} stage - Budget={:.2f} ({:.0f}% of optimal)", + iteration + 1, + stage_name, + budget_hpwl_, + (budget_multipliers[iteration] - 1.0) * 100.0); + + // Run optimization passes for this iteration + for (int p = 1; p <= passes; p++) { + last_hpwl = curr_hpwl; + globalSwap(); + curr_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); + + mgr_->getLogger()->info( + DPL, + 331, + "Congestion optimization iteration {} pass {:d}; hpwl is {:.6e}.", + iteration + 1, + p, + (double) curr_hpwl); + + if (last_hpwl == 0 + || std::abs(curr_hpwl - last_hpwl) / (double) last_hpwl <= tol) { + break; + } + } + + // Report iteration results + const double iteration_improvement + = ((init_hpwl - curr_hpwl) / static_cast(init_hpwl)) * 100.0; + double budget_utilization = 0.0; + const double budget_range = budget_hpwl_ - optimal_hpwl; + if (std::abs(budget_range) > std::numeric_limits::epsilon()) { + budget_utilization = ((curr_hpwl - optimal_hpwl) / budget_range) * 100.0; + } + mgr_->getLogger()->info(DPL, + 920, + "Iteration {} complete: HPWL={:.6e}, " + "improvement={:.2f}%, budget utilization={:.1f}%", + iteration + 1, + static_cast(curr_hpwl), + iteration_improvement, + budget_utilization); + } + + // Final reporting + double final_improvement + = (((init_hpwl - curr_hpwl) / (double) init_hpwl) * 100.); + double final_budget_utilization = 0.0; + const double final_budget_range = budget_hpwl_ - optimal_hpwl; + if (std::abs(final_budget_range) > std::numeric_limits::epsilon()) { + final_budget_utilization + = ((curr_hpwl - optimal_hpwl) / final_budget_range) * 100.0; + } + + mgr_->getLogger()->info( + DPL, + 910, + "Two-pass optimization complete: " + "final HPWL={:.6e}, improvement={:.2f}%, budget utilization={:.1f}%", + (double) curr_hpwl, + final_improvement, + final_budget_utilization); + + // Ensure original displacement limits are fully restored + mgr_->setMaxDisplacement(orig_disp_x, orig_disp_y); + mgr_->getLogger()->info( + DPL, + 924, + "Final restoration: displacement limits restored to original ({}, {})", + orig_disp_x, + orig_disp_y); } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// void DetailedGlobalSwap::globalSwap() { - // Nothing for than random greedy improvement with only a hpwl objective - // and done such that every candidate cell is considered once!!! + // Two-pass budget-constrained global swap: profiling pass or congestion + // optimization pass + if (swap_params_ == nullptr && mgr_ != nullptr) { + swap_params_ = &mgr_->getGlobalSwapParams(); + } traversal_ = 0; edgeMask_.resize(network_->getNumEdges()); @@ -142,25 +339,180 @@ void DetailedGlobalSwap::globalSwap() hpwlObj.init(mgr_, nullptr); // Ignore orientation. double currHpwl = hpwlObj.curr(); - double nextHpwl = 0.; + const double initHpwl = currHpwl; + + // Determine budget constraint based on pass type + double maxAllowedHpwl; + if (is_profiling_pass_) { + // In profiling pass: use generous budget for pure HPWL optimization + maxAllowedHpwl = initHpwl * 2.0; // Allow large changes during profiling + mgr_->getLogger()->info( + DPL, + 914, + "Profiling pass: initial HPWL={:.2f}, generous budget={:.2f}", + initHpwl, + maxAllowedHpwl); + } else { + // In congestion optimization pass: use strict budget from profiling + maxAllowedHpwl = budget_hpwl_; + mgr_->getLogger()->info(DPL, + 915, + "Congestion optimization pass: initial " + "HPWL={:.2f}, budget={:.2f} (from profiling)", + initHpwl, + maxAllowedHpwl); + } + + int moves_since_normalization = 0; + const int normalization_interval + = swap_params_ ? swap_params_->normalization_interval : 1000; + // Consider each candidate cell once. for (auto ndi : candidates) { - if (!generate(ndi)) { - continue; + // Hybrid move generation: Smart Swap logic + bool move_generated = false; + + // Phase 1: Try wirelength-optimal move (unless we decide to override with + // exploration) + if (mgr_->getRandom(1000) >= static_cast(tradeoff_ * 1000)) { + move_generated = generateWirelengthOptimalMove(ndi); } - double delta = hpwlObj.delta(mgr_->getJournal()); + // Phase 2: If no move generated OR we decided to override, try random + // exploration move + if (!move_generated) { + move_generated = generateRandomMove(ndi); + } - nextHpwl = currHpwl - delta; // -delta is +ve is less. + if (!move_generated) { + continue; // No valid move found with either generator + } - if (nextHpwl <= currHpwl) { + // Calculate HPWL delta + double hpwl_delta = hpwlObj.delta(mgr_->getJournal()); + double nextHpwl = currHpwl - hpwl_delta; // Projected HPWL after this move + + // Calculate congestion improvement (only relevant in second pass) + double congestion_improvement = 0.0; + if (!is_profiling_pass_) { // Only calculate congestion improvement in + // second pass + const auto& journal = mgr_->getJournal(); + if (!journal.empty()) { + for (const auto& action_ptr : journal) { + // Only handle MoveCellAction types + if (action_ptr->typeId() != JournalActionTypeEnum::MOVE_CELL) { + continue; + } + + const MoveCellAction* move_action + = static_cast(action_ptr.get()); + Node* moved_cell = move_action->getNode(); + if (!moved_cell + || moved_cell->getId() >= congestion_contribution_.size()) { + continue; + } + + // Get original and new grid coordinates + const auto* grid = mgr_->getGrid(); + const GridX orig_grid_x = grid->gridX(move_action->getOrigLeft()); + const GridY orig_grid_y + = grid->gridSnapDownY(move_action->getOrigBottom()); + const GridX new_grid_x = grid->gridX(move_action->getNewLeft()); + const GridY new_grid_y + = grid->gridSnapDownY(move_action->getNewBottom()); + + // Calculate pixel indices (row-major order) + const int row_site_count = grid->getRowSiteCount().v; + const int orig_pixel_idx + = (orig_grid_y.v * row_site_count) + orig_grid_x.v; + const int new_pixel_idx + = (new_grid_y.v * row_site_count) + new_grid_x.v; + + // Get utilization densities at original and new locations + const float orig_density + = grid->getUtilizationDensity(orig_pixel_idx); + const float new_density = grid->getUtilizationDensity(new_pixel_idx); + + // Get pre-calculated congestion contribution for this cell + const double cell_cong_contrib + = congestion_contribution_[moved_cell->getId()]; + + // ΔCongestion = (orig_density - new_density) scaled by the cell's + // weighted contribution. + congestion_improvement + += (orig_density - new_density) * cell_cong_contrib; + } + } + } + + // Hybrid acceptance criteria: budget constraint + combined objective + if (nextHpwl > maxAllowedHpwl) { + // Hard constraint violated: reject move regardless of other benefits + mgr_->rejectMove(); + continue; + } + + // Within budget: evaluate combined profit + double combined_profit + = hpwl_delta + (congestion_weight_ * congestion_improvement); + + if (combined_profit > 0) { + // Accept: move is profitable and within budget hpwlObj.accept(); mgr_->acceptMove(); currHpwl = nextHpwl; + + // Update utilization map for accepted moves (only in congestion + // optimization pass) + if (!is_profiling_pass_) { + const auto& journal = mgr_->getJournal(); + if (!journal.empty()) { + for (const auto& action_ptr : journal) { + if (action_ptr->typeId() != JournalActionTypeEnum::MOVE_CELL) { + continue; + } + + const MoveCellAction* move_action + = static_cast(action_ptr.get()); + Node* moved_cell = move_action->getNode(); + if (!moved_cell) { + continue; + } + + // Remove cell from old location and add to new location + mgr_->getGrid()->updateUtilizationMap(moved_cell, + move_action->getOrigLeft(), + move_action->getOrigBottom(), + false); + mgr_->getGrid()->updateUtilizationMap(moved_cell, + move_action->getNewLeft(), + move_action->getNewBottom(), + true); + + moves_since_normalization++; + } + } + // Lazy normalization + if (moves_since_normalization >= normalization_interval) { + mgr_->getGrid()->normalizeUtilization(); + moves_since_normalization = 0; + } + } } else { mgr_->rejectMove(); } } + + // Report final statistics + const double finalDegradation = ((currHpwl - initHpwl) / initHpwl) * 100.0; + const char* pass_name + = is_profiling_pass_ ? "Profiling" : "Congestion optimization"; + mgr_->getLogger()->info(DPL, + 916, + "{} pass complete: final HPWL={:.2f}, change={:.1f}%", + pass_name, + currHpwl, + finalDegradation); } //////////////////////////////////////////////////////////////////////////////// @@ -275,7 +627,7 @@ bool DetailedGlobalSwap::calculateEdgeBB(Edge* ed, Node* nd, odb::Rect& bbox) } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -bool DetailedGlobalSwap::generate(Node* ndi) +bool DetailedGlobalSwap::generateWirelengthOptimalMove(Node* ndi) { double yi = ndi->getBottom().v + 0.5 * ndi->getHeight().v; double xi = ndi->getLeft().v + 0.5 * ndi->getWidth().v; @@ -292,10 +644,7 @@ bool DetailedGlobalSwap::generate(Node* ndi) return false; } - // Observe displacement limit. I suppose there are options. - // If we cannot move into the optimal region, we could try - // to move closer to it. Or, we could just reject if we cannot - // get into the optimal region. + // Observe displacement limit. int dispX, dispY; mgr_->getMaxDisplacement(dispX, dispY); odb::Rect lbox(ndi->getLeft().v - dispX, @@ -363,6 +712,95 @@ bool DetailedGlobalSwap::generate(Node* ndi) return false; } +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +bool DetailedGlobalSwap::generateRandomMove(Node* ndi) +{ + // Generate a random move within the current displacement constraints + // This is for exploration and power optimization purposes + + if (mgr_->getNumReverseCellToSegs(ndi->getId()) != 1) { + return false; + } + int si = mgr_->getReverseCellToSegs(ndi->getId())[0]->getSegId(); + + // Get current displacement limits + int dispX, dispY; + mgr_->getMaxDisplacement(dispX, dispY); + + // Define the search area around the current cell position + DbuX curr_x = ndi->getLeft(); + DbuY curr_y = ndi->getBottom(); + + DbuX min_x = std::max(arch_->getMinX(), curr_x - dispX); + DbuX max_x = std::min(arch_->getMaxX(), curr_x + dispX); + DbuY min_y = std::max(arch_->getMinY(), curr_y - dispY); + DbuY max_y = std::min(arch_->getMaxY(), curr_y + dispY); + + // Try up to 10 random locations within the displacement area + const int max_attempts = 10; + for (int attempt = 0; attempt < max_attempts; attempt++) { + // Generate random coordinates within the allowed displacement area + DbuX rand_x{min_x.v + mgr_->getRandom(max_x.v - min_x.v + 1)}; + DbuY rand_y{min_y.v + mgr_->getRandom(max_y.v - min_y.v + 1)}; + + // Find the appropriate row and segment for this random location + int rj = arch_->find_closest_row(rand_y); + rand_y = DbuY{arch_->getRow(rj)->getBottom()}; // Row alignment + + int sj = -1; + for (int s = 0; s < mgr_->getNumSegsInRow(rj); s++) { + DetailedSeg* segPtr = mgr_->getSegsInRow(rj)[s]; + if (rand_x >= segPtr->getMinX() && rand_x <= segPtr->getMaxX()) { + sj = segPtr->getSegId(); + break; + } + } + + if (sj == -1) { + continue; // Invalid segment, try another random location + } + + if (ndi->getGroupId() != mgr_->getSegment(sj)->getRegId()) { + continue; // Wrong region, try another location + } + + // Try to execute the move/swap to this random location + if (mgr_->tryMove(ndi, curr_x, curr_y, si, rand_x, rand_y, sj)) { + ++moves_; + return true; + } + if (mgr_->trySwap(ndi, curr_x, curr_y, si, rand_x, rand_y, sj)) { + ++swaps_; + return true; + } + } + + return false; // Could not find a valid random move after max_attempts +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +bool DetailedGlobalSwap::generate(Node* ndi) +{ + // Hybrid move generation: Smart Swap logic + bool move_generated = false; + + // Phase 1: Try wirelength-optimal move (unless we decide to override with + // exploration) + if (mgr_->getRandom(1000) >= static_cast(tradeoff_ * 1000)) { + move_generated = generateWirelengthOptimalMove(ndi); + } + + // Phase 2: If no move generated OR we decided to override, try random + // exploration move + if (!move_generated) { + move_generated = generateRandomMove(ndi); + } + + return move_generated; +} + //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void DetailedGlobalSwap::init(DetailedMgr* mgr) @@ -370,10 +808,38 @@ void DetailedGlobalSwap::init(DetailedMgr* mgr) mgr_ = mgr; arch_ = mgr->getArchitecture(); network_ = mgr->getNetwork(); + swap_params_ = &mgr->getGlobalSwapParams(); traversal_ = 0; edgeMask_.resize(network_->getNumEdges()); std::ranges::fill(edgeMask_, 0); + + // Congestion-aware placement initialization. + const float area_weight = static_cast(swap_params_->area_weight); + const float pin_weight = static_cast(swap_params_->pin_weight); + mgr_->getGrid()->computeUtilizationMap(network_, area_weight, pin_weight); + + congestion_contribution_.resize(network_->getNumNodes()); + for (const auto& node_ptr : network_->getNodes()) { + Node* node = node_ptr.get(); + if (node && node->getType() == Node::Type::CELL) { + const double cell_area + = static_cast(node->getWidth().v) * node->getHeight().v; + const double num_pins = static_cast(node->getNumPins()); + congestion_contribution_[node->getId()] + = area_weight * cell_area + pin_weight * num_pins; + } + } + + // Calculate adaptive congestion weight by sampling typical HPWL deltas and + // improvements. + congestion_weight_ = calculateAdaptiveCongestionWeight(); + + mgr_->getLogger()->info( + DPL, + 901, + "Initialized congestion-aware global swap with adaptive weight={:.3f}", + congestion_weight_); } //////////////////////////////////////////////////////////////////////////////// @@ -386,12 +852,136 @@ bool DetailedGlobalSwap::generate(DetailedMgr* mgr, mgr_ = mgr; arch_ = mgr->getArchitecture(); network_ = mgr->getNetwork(); + swap_params_ = &mgr->getGlobalSwapParams(); Node* ndi = candidates[mgr_->getRandom(candidates.size())]; return generate(ndi); } +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +double DetailedGlobalSwap::calculateAdaptiveCongestionWeight() +{ + const int num_samples = swap_params_ ? swap_params_->sampling_moves : 150; + const double user_knob + = swap_params_ ? swap_params_->user_congestion_weight : 35.0; + + // Get candidate cells for sampling + std::vector candidates = mgr_->getSingleHeightCells(); + if (candidates.size() < 2) { + return 1.0 * mgr_->getGrid()->getSiteWidth().v; + } + + // Create temporary HPWL objective for sampling + DetailedHPWL hpwlObj(network_); + hpwlObj.init(mgr_, nullptr); + + double total_hpwl_delta = 0.0; + double total_cong_improvement = 0.0; + int valid_samples = 0; + + // Sample random swaps to estimate typical deltas + for (int i = 0; i < num_samples && i < candidates.size(); i++) { + // Pick a random candidate cell + Node* cell_a = candidates[mgr_->getRandom(candidates.size())]; + + // Try to generate a move/swap for this cell + if (!generate(cell_a)) { + continue; // Skip if no valid move found + } + + // Calculate HPWL delta + double hpwl_delta = hpwlObj.delta(mgr_->getJournal()); + + // Calculate congestion improvement + double cong_improvement = 0.0; + const auto& journal = mgr_->getJournal(); + if (!journal.empty()) { + for (const auto& action_ptr : journal) { + if (action_ptr->typeId() != JournalActionTypeEnum::MOVE_CELL) { + continue; + } + + const MoveCellAction* move_action + = static_cast(action_ptr.get()); + Node* moved_cell = move_action->getNode(); + if (!moved_cell + || moved_cell->getId() >= congestion_contribution_.size()) { + continue; + } + + // Get grid coordinates + const auto* grid = mgr_->getGrid(); + const GridX orig_grid_x = grid->gridX(move_action->getOrigLeft()); + const GridY orig_grid_y + = grid->gridSnapDownY(move_action->getOrigBottom()); + const GridX new_grid_x = grid->gridX(move_action->getNewLeft()); + const GridY new_grid_y + = grid->gridSnapDownY(move_action->getNewBottom()); + + // Calculate pixel indices + const int row_site_count = grid->getRowSiteCount().v; + const int orig_pixel_idx + = (orig_grid_y.v * row_site_count) + orig_grid_x.v; + const int new_pixel_idx + = (new_grid_y.v * row_site_count) + new_grid_x.v; + + // Get densities + const float orig_density = grid->getUtilizationDensity(orig_pixel_idx); + const float new_density = grid->getUtilizationDensity(new_pixel_idx); + + // Get cell contribution + const double cell_cong_contrib + = congestion_contribution_[moved_cell->getId()]; + + // Calculate improvement + cong_improvement += (orig_density - new_density) * cell_cong_contrib; + } + } + + // Accumulate magnitudes + total_hpwl_delta += std::abs(hpwl_delta); + total_cong_improvement += std::abs(cong_improvement); + valid_samples++; + + // Always reject the sample move + mgr_->rejectMove(); + } + + if (valid_samples == 0) { + mgr_->getLogger()->warn( + DPL, + 902, + "No valid samples for adaptive weight calculation, using fallback"); + return 1.0 * mgr_->getGrid()->getSiteWidth().v; + } + + // Calculate averages + double avg_hpwl_delta = total_hpwl_delta / valid_samples; + double avg_cong_improvement = total_cong_improvement / valid_samples; + + // Calculate adaptive weight + double adaptive_weight; + if (avg_cong_improvement > 0) { + adaptive_weight = (avg_hpwl_delta / avg_cong_improvement) * user_knob; + } else { + adaptive_weight = 0.5 * mgr_->getGrid()->getSiteWidth().v; + } + + mgr_->getLogger()->info(DPL, + 903, + "Adaptive congestion weight: avg_hpwl_delta={:.2f}, " + "avg_cong_improvement={:.6f}, " + "samples={}, weight={:.3f}", + avg_hpwl_delta, + avg_cong_improvement, + valid_samples, + adaptive_weight); + + return adaptive_weight; +} + //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void DetailedGlobalSwap::stats() diff --git a/src/dpl/src/optimization/detailed_global.h b/src/dpl/src/optimization/detailed_global.h index fb4e351081a..61334f24ad5 100644 --- a/src/dpl/src/optimization/detailed_global.h +++ b/src/dpl/src/optimization/detailed_global.h @@ -16,6 +16,7 @@ class Edge; class Architecture; class DetailedMgr; class Network; +struct GlobalSwapParams; class DetailedGlobalSwap : public DetailedGenerator { @@ -37,6 +38,9 @@ class DetailedGlobalSwap : public DetailedGenerator bool calculateEdgeBB(Edge* ed, Node* nd, odb::Rect& bbox); bool getRange(Node*, odb::Rect&); bool generate(Node* ndi); + bool generateWirelengthOptimalMove(Node* ndi); + bool generateRandomMove(Node* ndi); + double calculateAdaptiveCongestionWeight(); // Standard stuff. DetailedMgr* mgr_; @@ -55,6 +59,14 @@ class DetailedGlobalSwap : public DetailedGenerator int attempts_; int moves_; int swaps_; + + // Two-pass optimization state + double budget_hpwl_ = 0.0; + bool is_profiling_pass_ = false; + double tradeoff_ = 0.2; + double congestion_weight_ = 0.0; + std::vector congestion_contribution_; + const GlobalSwapParams* swap_params_ = nullptr; }; } // namespace dpl diff --git a/src/dpl/src/optimization/detailed_global_legacy.cxx b/src/dpl/src/optimization/detailed_global_legacy.cxx new file mode 100644 index 00000000000..6ce70ea8f97 --- /dev/null +++ b/src/dpl/src/optimization/detailed_global_legacy.cxx @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2021-2025, The OpenROAD Authors +// This file exists to preserve traditional DPO behavior and should at some +// point be merged with the newer DPO flow more cohesively. + +#include "detailed_global_legacy.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "boost/tokenizer.hpp" +#include "detailed_manager.h" +#include "dpl/Opendp.h" +#include "infrastructure/Objects.h" +#include "objective/detailed_hpwl.h" +#include "optimization/detailed_generator.h" +#include "util/utility.h" +#include "utl/Logger.h" + +namespace dpl { + +namespace legacy { + +using utl::DPL; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +DetailedGlobalSwap::DetailedGlobalSwap(Architecture* arch, Network* network) + : DetailedGenerator("global swap"), + mgr_(nullptr), + arch_(arch), + network_(network), + skipNetsLargerThanThis_(100), + traversal_(0), + attempts_(0), + moves_(0), + swaps_(0) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +DetailedGlobalSwap::DetailedGlobalSwap() : DetailedGlobalSwap(nullptr, nullptr) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +void DetailedGlobalSwap::run(DetailedMgr* mgrPtr, const std::string& command) +{ + // A temporary interface to allow for a string which we will decode to create + // the arguments. + boost::char_separator separators(" \r\t\n;"); + boost::tokenizer> tokens(command, separators); + std::vector args; + for (const auto& token : tokens) { + args.push_back(token); + } + run(mgrPtr, args); +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +void DetailedGlobalSwap::run(DetailedMgr* mgrPtr, + std::vector& args) +{ + // Given the arguments, figure out which routine to run to do the reordering. + + mgr_ = mgrPtr; + arch_ = mgr_->getArchitecture(); + network_ = mgr_->getNetwork(); + + int passes = 1; + double tol = 0.01; + for (size_t i = 1; i < args.size(); i++) { + if (args[i] == "-p" && i + 1 < args.size()) { + passes = std::atoi(args[++i].c_str()); + } else if (args[i] == "-t" && i + 1 < args.size()) { + tol = std::atof(args[++i].c_str()); + } + } + passes = std::max(passes, 1); + tol = std::max(tol, 0.01); + + int64_t last_hpwl, curr_hpwl, init_hpwl; + uint64_t hpwl_x, hpwl_y; + + curr_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); + init_hpwl = curr_hpwl; + if (init_hpwl == 0) { + return; + } + for (int p = 1; p <= passes; p++) { + last_hpwl = curr_hpwl; + + // XXX: Actually, global swapping is nothing more than random + // greedy improvement in which the move generating is done + // using this object to generate a target which is the optimal + // region for each candidate cell. + globalSwap(); + + curr_hpwl = Utility::hpwl(network_, hpwl_x, hpwl_y); + + mgr_->getLogger()->info(DPL, + 306, + "Pass {:3d} of global swaps; hpwl is {:.6e}.", + p, + (double) curr_hpwl); + + if (last_hpwl == 0 + || std::abs(curr_hpwl - last_hpwl) / (double) last_hpwl <= tol) { + break; + } + } + double curr_imp = (((init_hpwl - curr_hpwl) / (double) init_hpwl) * 100.); + mgr_->getLogger()->info(DPL, + 307, + "End of global swaps; objective is {:.6e}, " + "improvement is {:.2f} percent.", + (double) curr_hpwl, + curr_imp); +} + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +void DetailedGlobalSwap::globalSwap() +{ + // Nothing for than random greedy improvement with only a hpwl objective + // and done such that every candidate cell is considered once!!! + + traversal_ = 0; + edgeMask_.resize(network_->getNumEdges()); + std::ranges::fill(edgeMask_, 0); + + mgr_->resortSegments(); + + // Get candidate cells. + std::vector candidates = mgr_->getSingleHeightCells(); + mgr_->shuffle(candidates); + + // Wirelength objective. + DetailedHPWL hpwlObj(network_); + hpwlObj.init(mgr_, nullptr); // Ignore orientation. + + double currHpwl = hpwlObj.curr(); + double nextHpwl = 0.; + // Consider each candidate cell once. + for (auto ndi : candidates) { + if (!generate(ndi)) { + continue; + } + + double delta = hpwlObj.delta(mgr_->getJournal()); + + nextHpwl = currHpwl - delta; // -delta is +ve is less. + + if (nextHpwl <= currHpwl) { + hpwlObj.accept(); + mgr_->acceptMove(); + currHpwl = nextHpwl; + } else { + mgr_->rejectMove(); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +bool DetailedGlobalSwap::getRange(Node* nd, odb::Rect& nodeBbox) +{ + // Determines the median location for a node. + + Edge* ed; + unsigned mid; + + Pin* pin; + unsigned t = 0; + + DbuX xmin = arch_->getMinX(); + DbuX xmax = arch_->getMaxX(); + DbuY ymin = arch_->getMinY(); + DbuY ymax = arch_->getMaxY(); + + xpts_.clear(); + ypts_.clear(); + for (int n = 0; n < nd->getNumPins(); n++) { + pin = nd->getPins()[n]; + + ed = pin->getEdge(); + + nodeBbox.mergeInit(); + + int numPins = ed->getNumPins(); + if (numPins <= 1) { + continue; + } + if (numPins > skipNetsLargerThanThis_) { + continue; + } + if (!calculateEdgeBB(ed, nd, nodeBbox)) { + continue; + } + + // We've computed an interval for the pin. We need to alter it to work for + // the cell center. Also, we need to avoid going off the edge of the chip. + nodeBbox.set_xlo(std::min( + std::max(xmin.v, nodeBbox.xMin() - pin->getOffsetX().v), xmax.v)); + nodeBbox.set_xhi(std::max( + std::min(xmax.v, nodeBbox.xMax() - pin->getOffsetX().v), xmin.v)); + nodeBbox.set_ylo(std::min( + std::max(ymin.v, nodeBbox.yMin() - pin->getOffsetY().v), ymax.v)); + nodeBbox.set_yhi(std::max( + std::min(ymax.v, nodeBbox.yMax() - pin->getOffsetY().v), ymin.v)); + + // Record the location and pin offset used to generate this point. + + xpts_.push_back(nodeBbox.xMin()); + xpts_.push_back(nodeBbox.xMax()); + + ypts_.push_back(nodeBbox.yMin()); + ypts_.push_back(nodeBbox.yMax()); + + ++t; + ++t; + } + + // If, for some weird reason, we didn't find anything connected, then + // return false to indicate that there's nowhere to move the cell. + if (t <= 1) { + return false; + } + + // Get the median values. + mid = t >> 1; + + std::ranges::sort(xpts_); + std::ranges::sort(ypts_); + + nodeBbox.set_xlo(xpts_[mid - 1]); + nodeBbox.set_xhi(xpts_[mid]); + + nodeBbox.set_ylo(ypts_[mid - 1]); + nodeBbox.set_yhi(ypts_[mid]); + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +bool DetailedGlobalSwap::calculateEdgeBB(Edge* ed, Node* nd, odb::Rect& bbox) +{ + // Computes the bounding box of an edge. Node 'nd' is the node to SKIP. + DbuX curX; + DbuY curY; + + bbox.mergeInit(); + + int count = 0; + for (Pin* pin : ed->getPins()) { + auto other = pin->getNode(); + if (other == nd) { + continue; + } + curX = other->getCenterX() + pin->getOffsetX().v; + curY = other->getCenterY() + pin->getOffsetY().v; + + bbox.set_xlo(std::min(curX.v, bbox.xMin())); + bbox.set_xhi(std::max(curX.v, bbox.xMax())); + bbox.set_ylo(std::min(curY.v, bbox.yMin())); + bbox.set_yhi(std::max(curY.v, bbox.yMax())); + + ++count; + } + + return (count == 0) ? false : true; +} +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +bool DetailedGlobalSwap::generate(Node* ndi) +{ + double yi = ndi->getBottom().v + (0.5 * ndi->getHeight().v); + double xi = ndi->getLeft().v + (0.5 * ndi->getWidth().v); + + // Determine optimal region. + odb::Rect bbox; + if (!getRange(ndi, bbox)) { + // Failed to find an optimal region. + return false; + } + if (xi >= bbox.xMin() && xi <= bbox.xMax() && yi >= bbox.yMin() + && yi <= bbox.yMax()) { + // If cell inside box, do nothing. + return false; + } + + // Observe displacement limit. I suppose there are options. + // If we cannot move into the optimal region, we could try + // to move closer to it. Or, we could just reject if we cannot + // get into the optimal region. + int dispX, dispY; + mgr_->getMaxDisplacement(dispX, dispY); + odb::Rect lbox(ndi->getLeft().v - dispX, + ndi->getBottom().v - dispY, + ndi->getLeft().v + dispX, + ndi->getBottom().v + dispY); + if (lbox.xMax() <= bbox.xMin()) { + bbox.set_xlo(ndi->getLeft().v); + bbox.set_xhi(lbox.xMax()); + } else if (lbox.xMin() >= bbox.xMax()) { + bbox.set_xlo(lbox.xMin()); + bbox.set_xhi(ndi->getLeft().v); + } else { + bbox.set_xlo(std::max(bbox.xMin(), lbox.xMin())); + bbox.set_xhi(std::min(bbox.xMax(), lbox.xMax())); + } + if (lbox.yMax() <= bbox.yMin()) { + bbox.set_ylo(ndi->getBottom().v); + bbox.set_yhi(lbox.yMax()); + } else if (lbox.yMin() >= bbox.yMax()) { + bbox.set_ylo(lbox.yMin()); + bbox.set_yhi(ndi->getBottom().v); + } else { + bbox.set_ylo(std::max(bbox.yMin(), lbox.yMin())); + bbox.set_yhi(std::min(bbox.yMax(), lbox.yMax())); + } + + if (mgr_->getNumReverseCellToSegs(ndi->getId()) != 1) { + return false; + } + int si = mgr_->getReverseCellToSegs(ndi->getId())[0]->getSegId(); + + // Position target so center of cell at center of box. + DbuX xj{(int) std::floor((0.5 * (bbox.xMin() + bbox.xMax())) + - (0.5 * ndi->getWidth().v))}; + DbuY yj{(int) std::floor((0.5 * (bbox.yMin() + bbox.yMax())) + - (0.5 * ndi->getHeight().v))}; + + // Row and segment for the destination. + int rj = arch_->find_closest_row(yj); + yj = DbuY{arch_->getRow(rj)->getBottom()}; // Row alignment. + int sj = -1; + for (int s = 0; s < mgr_->getNumSegsInRow(rj); s++) { + DetailedSeg* segPtr = mgr_->getSegsInRow(rj)[s]; + if (xj >= segPtr->getMinX() && xj <= segPtr->getMaxX()) { + sj = segPtr->getSegId(); + break; + } + } + if (sj == -1) { + return false; + } + if (ndi->getGroupId() != mgr_->getSegment(sj)->getRegId()) { + return false; + } + + if (mgr_->tryMove(ndi, ndi->getLeft(), ndi->getBottom(), si, xj, yj, sj)) { + ++moves_; + return true; + } + if (mgr_->trySwap(ndi, ndi->getLeft(), ndi->getBottom(), si, xj, yj, sj)) { + ++swaps_; + return true; + } + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +void DetailedGlobalSwap::init(DetailedMgr* mgr) +{ + mgr_ = mgr; + arch_ = mgr->getArchitecture(); + network_ = mgr->getNetwork(); + + traversal_ = 0; + edgeMask_.resize(network_->getNumEdges()); + std::ranges::fill(edgeMask_, 0); +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +bool DetailedGlobalSwap::generate(DetailedMgr* mgr, + std::vector& candidates) +{ + ++attempts_; + + mgr_ = mgr; + arch_ = mgr->getArchitecture(); + network_ = mgr->getNetwork(); + + Node* ndi = candidates[mgr_->getRandom(candidates.size())]; + + return generate(ndi); +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +void DetailedGlobalSwap::stats() +{ + mgr_->getLogger()->info( + DPL, + 348, + "Generator {:s}, " + "Cumulative attempts {:d}, swaps {:d}, moves {:5d} since last reset.", + getName().c_str(), + attempts_, + swaps_, + moves_); +} + +} // namespace legacy + +} // namespace dpl diff --git a/src/dpl/src/optimization/detailed_global_legacy.h b/src/dpl/src/optimization/detailed_global_legacy.h new file mode 100644 index 00000000000..3e1fe3bf3f1 --- /dev/null +++ b/src/dpl/src/optimization/detailed_global_legacy.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2021-2025, The OpenROAD Authors + +#pragma once + +#include +#include + +#include "detailed_generator.h" + +namespace odb { +class Rect; +} +namespace dpl { +class Edge; +class Architecture; +class DetailedMgr; +class Network; + +namespace legacy { +using ::dpl::Architecture; +using ::dpl::DetailedMgr; +using ::dpl::Edge; +using ::dpl::Network; + +class DetailedGlobalSwap : public DetailedGenerator +{ + public: + DetailedGlobalSwap(Architecture* arch, Network* network); + DetailedGlobalSwap(); + + // Interfaces for scripting. + void run(DetailedMgr* mgrPtr, const std::string& command); + void run(DetailedMgr* mgrPtr, std::vector& args); + + // Interface for move generation. + bool generate(DetailedMgr* mgr, std::vector& candidates) override; + void stats() override; + void init(DetailedMgr* mgr) override; + + private: + void globalSwap(); // tries to avoid overlap. + bool calculateEdgeBB(Edge* ed, Node* nd, odb::Rect& bbox); + bool getRange(Node*, odb::Rect&); + bool generate(Node* ndi); + + // Standard stuff. + DetailedMgr* mgr_; + Architecture* arch_; + Network* network_; + + // Other. + int skipNetsLargerThanThis_; + std::vector edgeMask_; + int traversal_; + + std::vector xpts_; + std::vector ypts_; + + // For use as a move generator. + int attempts_; + int moves_; + int swaps_; +}; + +} // namespace legacy + +} // namespace dpl diff --git a/src/dpl/src/optimization/detailed_manager.h b/src/dpl/src/optimization/detailed_manager.h index c252229cbc6..3c9ff47080e 100644 --- a/src/dpl/src/optimization/detailed_manager.h +++ b/src/dpl/src/optimization/detailed_manager.h @@ -88,6 +88,16 @@ class DetailedMgr x = maxDispX_; y = maxDispY_; } + void setGlobalSwapParams(const GlobalSwapParams& params) + { + global_swap_params_ = params; + } + const GlobalSwapParams& getGlobalSwapParams() const + { + return global_swap_params_; + } + void setExtraDplEnabled(bool enabled) { extra_dpl_enabled_ = enabled; } + bool isExtraDplEnabled() const { return extra_dpl_enabled_; } int getMaxDisplacementX() const { return maxDispX_; } int getMaxDisplacementY() const { return maxDispY_; } bool getDisallowOneSiteGaps() const { return disallowOneSiteGaps_; } @@ -244,6 +254,7 @@ class DetailedMgr void setMoveLimit(unsigned int newMoveLimit) { moveLimit_ = newMoveLimit; } // Journal operations + Journal& getJournal() { return journal_; } const Journal& getJournal() const { return journal_; } void eraseFromGrid(Node* node); void paintInGrid(Node* node); @@ -342,6 +353,8 @@ class DetailedMgr // Generic place for utilization. double targetUt_{1.0}; + GlobalSwapParams global_swap_params_; + bool extra_dpl_enabled_ = false; // Target displacement limits. int maxDispX_; diff --git a/src/dpl/src/util/journal.h b/src/dpl/src/util/journal.h index 62e1a18ce67..e0bb22f5fbc 100644 --- a/src/dpl/src/util/journal.h +++ b/src/dpl/src/util/journal.h @@ -114,6 +114,9 @@ class Journal size_t size() const { return actions_.size(); } const std::set& getAffectedNodes() const { return affected_nodes_; } const std::set& getAffectedEdges() const { return affected_edges_; } + // iterator support for range-based for loops + auto begin() const { return actions_.begin(); } + auto end() const { return actions_.end(); } // other void clear(); void undo(bool positions_only = false) const;