001/* 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.util; 020 021import java.util.Arrays; 022import java.util.Deque; 023import java.util.LinkedList; 024 025import org.apache.yetus.audience.InterfaceAudience; 026 027/** 028 * Computes the optimal (minimal cost) assignment of jobs to workers (or other 029 * analogous) concepts given a cost matrix of each pair of job and worker, using 030 * the algorithm by James Munkres in "Algorithms for the Assignment and 031 * Transportation Problems", with additional optimizations as described by Jin 032 * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment 033 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in 034 * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or 035 * workers, whichever is greater. 036 */ 037@InterfaceAudience.Private 038public class MunkresAssignment { 039 040 // The original algorithm by Munkres uses the terms STAR and PRIME to denote 041 // different states of zero values in the cost matrix. These values are 042 // represented as byte constants instead of enums to save space in the mask 043 // matrix by a factor of 4n^2 where n is the size of the problem. 044 private static final byte NONE = 0; 045 private static final byte STAR = 1; 046 private static final byte PRIME = 2; 047 048 // The algorithm requires that the number of column is at least as great as 049 // the number of rows. If that is not the case, then the cost matrix should 050 // be transposed before computation, and the solution matrix transposed before 051 // returning to the caller. 052 private final boolean transposed; 053 054 // The number of rows of internal matrices. 055 private final int rows; 056 057 // The number of columns of internal matrices. 058 private final int cols; 059 060 // The cost matrix, the cost of assigning each row index to column index. 061 private float[][] cost; 062 063 // Mask of zero cost assignment states. 064 private byte[][] mask; 065 066 // Covering some rows of the cost matrix. 067 private boolean[] rowsCovered; 068 069 // Covering some columns of the cost matrix. 070 private boolean[] colsCovered; 071 072 // The alternating path between starred zeroes and primed zeroes 073 private Deque<Pair<Integer, Integer>> path; 074 075 // The solution, marking which rows should be assigned to which columns. The 076 // positions of elements in this array correspond to the rows of the cost 077 // matrix, and the value of each element correspond to the columns of the cost 078 // matrix, i.e. assignments[i] = j indicates that row i should be assigned to 079 // column j. 080 private int[] assignments; 081 082 // Improvements described by Jin Kue Wong cache the least value in each row, 083 // as well as the column index of the least value in each row, and the pending 084 // adjustments to each row and each column. 085 private float[] leastInRow; 086 private int[] leastInRowIndex; 087 private float[] rowAdjust; 088 private float[] colAdjust; 089 090 /** 091 * Construct a new problem instance with the specified cost matrix. The cost 092 * matrix must be rectangular, though not necessarily square. If one dimension 093 * is greater than the other, some elements in the greater dimension will not 094 * be assigned. The input cost matrix will not be modified. 095 * @param costMatrix 096 */ 097 public MunkresAssignment(float[][] costMatrix) { 098 // The algorithm assumes that the number of columns is at least as great as 099 // the number of rows. If this is not the case of the input matrix, then 100 // all internal structures must be transposed relative to the input. 101 this.transposed = costMatrix.length > costMatrix[0].length; 102 if (this.transposed) { 103 this.rows = costMatrix[0].length; 104 this.cols = costMatrix.length; 105 } else { 106 this.rows = costMatrix.length; 107 this.cols = costMatrix[0].length; 108 } 109 110 cost = new float[rows][cols]; 111 mask = new byte[rows][cols]; 112 rowsCovered = new boolean[rows]; 113 colsCovered = new boolean[cols]; 114 path = new LinkedList<>(); 115 116 leastInRow = new float[rows]; 117 leastInRowIndex = new int[rows]; 118 rowAdjust = new float[rows]; 119 colAdjust = new float[cols]; 120 121 assignments = null; 122 123 // Copy cost matrix. 124 if (transposed) { 125 for (int r = 0; r < rows; r++) { 126 for (int c = 0; c < cols; c++) { 127 cost[r][c] = costMatrix[c][r]; 128 } 129 } 130 } else { 131 for (int r = 0; r < rows; r++) { 132 System.arraycopy(costMatrix[r], 0, cost[r], 0, cols); 133 } 134 } 135 136 // Costs must be finite otherwise the matrix can get into a bad state where 137 // no progress can be made. If your use case depends on a distinction 138 // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong. 139 for (int r = 0; r < rows; r++) { 140 for (int c = 0; c < cols; c++) { 141 if (cost[r][c] == Float.POSITIVE_INFINITY) { 142 cost[r][c] = Float.MAX_VALUE; 143 } 144 } 145 } 146 } 147 148 /** 149 * Get the optimal assignments. The returned array will have the same number 150 * of elements as the number of elements as the number of rows in the input 151 * cost matrix. Each element will indicate which column should be assigned to 152 * that row or -1 if no column should be assigned, i.e. if result[i] = j then 153 * row i should be assigned to column j. Subsequent invocations of this method 154 * will simply return the same object without additional computation. 155 * @return an array with the optimal assignments 156 */ 157 public int[] solve() { 158 // If this assignment problem has already been solved, return the known 159 // solution 160 if (assignments != null) { 161 return assignments; 162 } 163 164 preliminaries(); 165 166 // Find the optimal assignments. 167 while (!testIsDone()) { 168 while (!stepOne()) { 169 stepThree(); 170 } 171 stepTwo(); 172 } 173 174 // Extract the assignments from the mask matrix. 175 if (transposed) { 176 assignments = new int[cols]; 177 outer: 178 for (int c = 0; c < cols; c++) { 179 for (int r = 0; r < rows; r++) { 180 if (mask[r][c] == STAR) { 181 assignments[c] = r; 182 continue outer; 183 } 184 } 185 // There is no assignment for this row of the input/output. 186 assignments[c] = -1; 187 } 188 } else { 189 assignments = new int[rows]; 190 outer: 191 for (int r = 0; r < rows; r++) { 192 for (int c = 0; c < cols; c++) { 193 if (mask[r][c] == STAR) { 194 assignments[r] = c; 195 continue outer; 196 } 197 } 198 } 199 } 200 201 // Once the solution has been computed, there is no need to keep any of the 202 // other internal structures. Clear all unnecessary internal references so 203 // the garbage collector may reclaim that memory. 204 cost = null; 205 mask = null; 206 rowsCovered = null; 207 colsCovered = null; 208 path = null; 209 leastInRow = null; 210 leastInRowIndex = null; 211 rowAdjust = null; 212 colAdjust = null; 213 214 return assignments; 215 } 216 217 /** 218 * Corresponds to the "preliminaries" step of the original algorithm. 219 * Guarantees that the matrix is an equivalent non-negative matrix with at 220 * least one zero in each row. 221 */ 222 private void preliminaries() { 223 for (int r = 0; r < rows; r++) { 224 // Find the minimum cost of each row. 225 float min = Float.POSITIVE_INFINITY; 226 for (int c = 0; c < cols; c++) { 227 min = Math.min(min, cost[r][c]); 228 } 229 230 // Subtract that minimum cost from each element in the row. 231 for (int c = 0; c < cols; c++) { 232 cost[r][c] -= min; 233 234 // If the element is now zero and there are no zeroes in the same row 235 // or column which are already starred, then star this one. There 236 // must be at least one zero because of subtracting the min cost. 237 if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) { 238 mask[r][c] = STAR; 239 // Cover this row and column so that no other zeroes in them can be 240 // starred. 241 rowsCovered[r] = true; 242 colsCovered[c] = true; 243 } 244 } 245 } 246 247 // Clear the covered rows and columns. 248 Arrays.fill(rowsCovered, false); 249 Arrays.fill(colsCovered, false); 250 } 251 252 /** 253 * Test whether the algorithm is done, i.e. we have the optimal assignment. 254 * This occurs when there is exactly one starred zero in each row. 255 * @return true if the algorithm is done 256 */ 257 private boolean testIsDone() { 258 // Cover all columns containing a starred zero. There can be at most one 259 // starred zero per column. Therefore, a covered column has an optimal 260 // assignment. 261 for (int r = 0; r < rows; r++) { 262 for (int c = 0; c < cols; c++) { 263 if (mask[r][c] == STAR) { 264 colsCovered[c] = true; 265 } 266 } 267 } 268 269 // Count the total number of covered columns. 270 int coveredCols = 0; 271 for (int c = 0; c < cols; c++) { 272 coveredCols += colsCovered[c] ? 1 : 0; 273 } 274 275 // Apply an row and column adjustments that are pending. 276 for (int r = 0; r < rows; r++) { 277 for (int c = 0; c < cols; c++) { 278 cost[r][c] += rowAdjust[r]; 279 cost[r][c] += colAdjust[c]; 280 } 281 } 282 283 // Clear the pending row and column adjustments. 284 Arrays.fill(rowAdjust, 0); 285 Arrays.fill(colAdjust, 0); 286 287 // The covers on columns and rows may have been reset, recompute the least 288 // value for each row. 289 for (int r = 0; r < rows; r++) { 290 leastInRow[r] = Float.POSITIVE_INFINITY; 291 for (int c = 0; c < cols; c++) { 292 if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) { 293 leastInRow[r] = cost[r][c]; 294 leastInRowIndex[r] = c; 295 } 296 } 297 } 298 299 // If all columns are covered, then we are done. Since there may be more 300 // columns than rows, we are also done if the number of covered columns is 301 // at least as great as the number of rows. 302 return (coveredCols == cols || coveredCols >= rows); 303 } 304 305 /** 306 * Corresponds to step 1 of the original algorithm. 307 * @return false if all zeroes are covered 308 */ 309 private boolean stepOne() { 310 while (true) { 311 Pair<Integer, Integer> zero = findUncoveredZero(); 312 if (zero == null) { 313 // No uncovered zeroes, need to manipulate the cost matrix in step 314 // three. 315 return false; 316 } else { 317 // Prime the uncovered zero and find a starred zero in the same row. 318 mask[zero.getFirst()][zero.getSecond()] = PRIME; 319 Pair<Integer, Integer> star = starInRow(zero.getFirst()); 320 if (star != null) { 321 // Cover the row with both the newly primed zero and the starred zero. 322 // Since this is the only place where zeroes are primed, and we cover 323 // it here, and rows are only uncovered when primes are erased, then 324 // there can be at most one primed uncovered zero. 325 rowsCovered[star.getFirst()] = true; 326 colsCovered[star.getSecond()] = false; 327 updateMin(star.getFirst(), star.getSecond()); 328 } else { 329 // Will go to step two after, where a path will be constructed, 330 // starting from the uncovered primed zero (there is only one). Since 331 // we have already found it, save it as the first node in the path. 332 path.clear(); 333 path.offerLast(new Pair<>(zero.getFirst(), zero.getSecond())); 334 return true; 335 } 336 } 337 } 338 } 339 340 /** 341 * Corresponds to step 2 of the original algorithm. 342 */ 343 private void stepTwo() { 344 // Construct a path of alternating starred zeroes and primed zeroes, where 345 // each starred zero is in the same column as the previous primed zero, and 346 // each primed zero is in the same row as the previous starred zero. The 347 // path will always end in a primed zero. 348 while (true) { 349 Pair<Integer, Integer> star = starInCol(path.getLast().getSecond()); 350 if (star != null) { 351 path.offerLast(star); 352 } else { 353 break; 354 } 355 Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst()); 356 path.offerLast(prime); 357 } 358 359 // Augment path - unmask all starred zeroes and star all primed zeroes. All 360 // nodes in the path will be either starred or primed zeroes. The set of 361 // starred zeroes is independent and now one larger than before. 362 for (Pair<Integer, Integer> p : path) { 363 if (mask[p.getFirst()][p.getSecond()] == STAR) { 364 mask[p.getFirst()][p.getSecond()] = NONE; 365 } else { 366 mask[p.getFirst()][p.getSecond()] = STAR; 367 } 368 } 369 370 // Clear all covers from rows and columns. 371 Arrays.fill(rowsCovered, false); 372 Arrays.fill(colsCovered, false); 373 374 // Remove the prime mask from all primed zeroes. 375 for (int r = 0; r < rows; r++) { 376 for (int c = 0; c < cols; c++) { 377 if (mask[r][c] == PRIME) { 378 mask[r][c] = NONE; 379 } 380 } 381 } 382 } 383 384 /** 385 * Corresponds to step 3 of the original algorithm. 386 */ 387 private void stepThree() { 388 // Find the minimum uncovered cost. 389 float min = leastInRow[0]; 390 for (int r = 1; r < rows; r++) { 391 if (leastInRow[r] < min) { 392 min = leastInRow[r]; 393 } 394 } 395 396 // Add the minimum cost to each of the costs in a covered row, or subtract 397 // the minimum cost from each of the costs in an uncovered column. As an 398 // optimization, do not actually modify the cost matrix yet, but track the 399 // adjustments that need to be made to each row and column. 400 for (int r = 0; r < rows; r++) { 401 if (rowsCovered[r]) { 402 rowAdjust[r] += min; 403 } 404 } 405 for (int c = 0; c < cols; c++) { 406 if (!colsCovered[c]) { 407 colAdjust[c] -= min; 408 } 409 } 410 411 // Since the cost matrix is not being updated yet, the minimum uncovered 412 // cost per row must be updated. 413 for (int r = 0; r < rows; r++) { 414 if (!colsCovered[leastInRowIndex[r]]) { 415 // The least value in this row was in an uncovered column, meaning that 416 // it would have had the minimum value subtracted from it, and therefore 417 // will still be the minimum value in that row. 418 leastInRow[r] -= min; 419 } else { 420 // The least value in this row was in a covered column and would not 421 // have had the minimum value subtracted from it, so the minimum value 422 // could be some in another column. 423 for (int c = 0; c < cols; c++) { 424 if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) { 425 leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r]; 426 leastInRowIndex[r] = c; 427 } 428 } 429 } 430 } 431 } 432 433 /** 434 * Find a zero cost assignment which is not covered. If there are no zero cost 435 * assignments which are uncovered, then null will be returned. 436 * @return pair of row and column indices of an uncovered zero or null 437 */ 438 private Pair<Integer, Integer> findUncoveredZero() { 439 for (int r = 0; r < rows; r++) { 440 if (leastInRow[r] == 0) { 441 return new Pair<>(r, leastInRowIndex[r]); 442 } 443 } 444 return null; 445 } 446 447 /** 448 * A specified row has become covered, and a specified column has become 449 * uncovered. The least value per row may need to be updated. 450 * @param row the index of the row which was just covered 451 * @param col the index of the column which was just uncovered 452 */ 453 private void updateMin(int row, int col) { 454 // If the row is covered we want to ignore it as far as least values go. 455 leastInRow[row] = Float.POSITIVE_INFINITY; 456 457 for (int r = 0; r < rows; r++) { 458 // Since the column has only just been uncovered, it could not have any 459 // pending adjustments. Only covered rows can have pending adjustments 460 // and covered costs do not count toward row minimums. Therefore, we do 461 // not need to consider rowAdjust[r] or colAdjust[col]. 462 if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) { 463 leastInRow[r] = cost[r][col]; 464 leastInRowIndex[r] = col; 465 } 466 } 467 } 468 469 /** 470 * Find a starred zero in a specified row. If there are no starred zeroes in 471 * the specified row, then null will be returned. 472 * @param r the index of the row to be searched 473 * @return pair of row and column indices of starred zero or null 474 */ 475 private Pair<Integer, Integer> starInRow(int r) { 476 for (int c = 0; c < cols; c++) { 477 if (mask[r][c] == STAR) { 478 return new Pair<>(r, c); 479 } 480 } 481 return null; 482 } 483 484 /** 485 * Find a starred zero in the specified column. If there are no starred zeroes 486 * in the specified row, then null will be returned. 487 * @param c the index of the column to be searched 488 * @return pair of row and column indices of starred zero or null 489 */ 490 private Pair<Integer, Integer> starInCol(int c) { 491 for (int r = 0; r < rows; r++) { 492 if (mask[r][c] == STAR) { 493 return new Pair<>(r, c); 494 } 495 } 496 return null; 497 } 498 499 /** 500 * Find a primed zero in the specified row. If there are no primed zeroes in 501 * the specified row, then null will be returned. 502 * @param r the index of the row to be searched 503 * @return pair of row and column indices of primed zero or null 504 */ 505 private Pair<Integer, Integer> primeInRow(int r) { 506 for (int c = 0; c < cols; c++) { 507 if (mask[r][c] == PRIME) { 508 return new Pair<>(r, c); 509 } 510 } 511 return null; 512 } 513}