001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.util.Arrays; 021import java.util.Deque; 022import java.util.LinkedList; 023import org.apache.yetus.audience.InterfaceAudience; 024 025/** 026 * Computes the optimal (minimal cost) assignment of jobs to workers (or other analogous) concepts 027 * given a cost matrix of each pair of job and worker, using the algorithm by James Munkres in 028 * "Algorithms for the Assignment and Transportation Problems", with additional optimizations as 029 * described by Jin Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment 030 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in O(n^3) time and need 031 * O(n^2) auxiliary space where n is the number of jobs or workers, whichever is greater. 032 */ 033@InterfaceAudience.Private 034public class MunkresAssignment { 035 036 // The original algorithm by Munkres uses the terms STAR and PRIME to denote 037 // different states of zero values in the cost matrix. These values are 038 // represented as byte constants instead of enums to save space in the mask 039 // matrix by a factor of 4n^2 where n is the size of the problem. 040 private static final byte NONE = 0; 041 private static final byte STAR = 1; 042 private static final byte PRIME = 2; 043 044 // The algorithm requires that the number of column is at least as great as 045 // the number of rows. If that is not the case, then the cost matrix should 046 // be transposed before computation, and the solution matrix transposed before 047 // returning to the caller. 048 private final boolean transposed; 049 050 // The number of rows of internal matrices. 051 private final int rows; 052 053 // The number of columns of internal matrices. 054 private final int cols; 055 056 // The cost matrix, the cost of assigning each row index to column index. 057 private float[][] cost; 058 059 // Mask of zero cost assignment states. 060 private byte[][] mask; 061 062 // Covering some rows of the cost matrix. 063 private boolean[] rowsCovered; 064 065 // Covering some columns of the cost matrix. 066 private boolean[] colsCovered; 067 068 // The alternating path between starred zeroes and primed zeroes 069 private Deque<Pair<Integer, Integer>> path; 070 071 // The solution, marking which rows should be assigned to which columns. The 072 // positions of elements in this array correspond to the rows of the cost 073 // matrix, and the value of each element correspond to the columns of the cost 074 // matrix, i.e. assignments[i] = j indicates that row i should be assigned to 075 // column j. 076 private int[] assignments; 077 078 // Improvements described by Jin Kue Wong cache the least value in each row, 079 // as well as the column index of the least value in each row, and the pending 080 // adjustments to each row and each column. 081 private float[] leastInRow; 082 private int[] leastInRowIndex; 083 private float[] rowAdjust; 084 private float[] colAdjust; 085 086 /** 087 * Construct a new problem instance with the specified cost matrix. The cost matrix must be 088 * rectangular, though not necessarily square. If one dimension is greater than the other, some 089 * elements in the greater dimension will not be assigned. The input cost matrix will not be 090 * modified. 091 */ 092 public MunkresAssignment(float[][] costMatrix) { 093 // The algorithm assumes that the number of columns is at least as great as 094 // the number of rows. If this is not the case of the input matrix, then 095 // all internal structures must be transposed relative to the input. 096 this.transposed = costMatrix.length > costMatrix[0].length; 097 if (this.transposed) { 098 this.rows = costMatrix[0].length; 099 this.cols = costMatrix.length; 100 } else { 101 this.rows = costMatrix.length; 102 this.cols = costMatrix[0].length; 103 } 104 105 cost = new float[rows][cols]; 106 mask = new byte[rows][cols]; 107 rowsCovered = new boolean[rows]; 108 colsCovered = new boolean[cols]; 109 path = new LinkedList<>(); 110 111 leastInRow = new float[rows]; 112 leastInRowIndex = new int[rows]; 113 rowAdjust = new float[rows]; 114 colAdjust = new float[cols]; 115 116 assignments = null; 117 118 // Copy cost matrix. 119 if (transposed) { 120 for (int r = 0; r < rows; r++) { 121 for (int c = 0; c < cols; c++) { 122 cost[r][c] = costMatrix[c][r]; 123 } 124 } 125 } else { 126 for (int r = 0; r < rows; r++) { 127 System.arraycopy(costMatrix[r], 0, cost[r], 0, cols); 128 } 129 } 130 131 // Costs must be finite otherwise the matrix can get into a bad state where 132 // no progress can be made. If your use case depends on a distinction 133 // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong. 134 for (int r = 0; r < rows; r++) { 135 for (int c = 0; c < cols; c++) { 136 if (cost[r][c] == Float.POSITIVE_INFINITY) { 137 cost[r][c] = Float.MAX_VALUE; 138 } 139 } 140 } 141 } 142 143 /** 144 * Get the optimal assignments. The returned array will have the same number of elements as the 145 * number of elements as the number of rows in the input cost matrix. Each element will indicate 146 * which column should be assigned to that row or -1 if no column should be assigned, i.e. if 147 * result[i] = j then row i should be assigned to column j. Subsequent invocations of this method 148 * will simply return the same object without additional computation. 149 * @return an array with the optimal assignments 150 */ 151 public int[] solve() { 152 // If this assignment problem has already been solved, return the known 153 // solution 154 if (assignments != null) { 155 return assignments; 156 } 157 158 preliminaries(); 159 160 // Find the optimal assignments. 161 while (!testIsDone()) { 162 while (!stepOne()) { 163 stepThree(); 164 } 165 stepTwo(); 166 } 167 168 // Extract the assignments from the mask matrix. 169 if (transposed) { 170 assignments = new int[cols]; 171 outer: for (int c = 0; c < cols; c++) { 172 for (int r = 0; r < rows; r++) { 173 if (mask[r][c] == STAR) { 174 assignments[c] = r; 175 continue outer; 176 } 177 } 178 // There is no assignment for this row of the input/output. 179 assignments[c] = -1; 180 } 181 } else { 182 assignments = new int[rows]; 183 outer: for (int r = 0; r < rows; r++) { 184 for (int c = 0; c < cols; c++) { 185 if (mask[r][c] == STAR) { 186 assignments[r] = c; 187 continue outer; 188 } 189 } 190 } 191 } 192 193 // Once the solution has been computed, there is no need to keep any of the 194 // other internal structures. Clear all unnecessary internal references so 195 // the garbage collector may reclaim that memory. 196 cost = null; 197 mask = null; 198 rowsCovered = null; 199 colsCovered = null; 200 path = null; 201 leastInRow = null; 202 leastInRowIndex = null; 203 rowAdjust = null; 204 colAdjust = null; 205 206 return assignments; 207 } 208 209 /** 210 * Corresponds to the "preliminaries" step of the original algorithm. Guarantees that the matrix 211 * is an equivalent non-negative matrix with at least one zero in each row. 212 */ 213 private void preliminaries() { 214 for (int r = 0; r < rows; r++) { 215 // Find the minimum cost of each row. 216 float min = Float.POSITIVE_INFINITY; 217 for (int c = 0; c < cols; c++) { 218 min = Math.min(min, cost[r][c]); 219 } 220 221 // Subtract that minimum cost from each element in the row. 222 for (int c = 0; c < cols; c++) { 223 cost[r][c] -= min; 224 225 // If the element is now zero and there are no zeroes in the same row 226 // or column which are already starred, then star this one. There 227 // must be at least one zero because of subtracting the min cost. 228 if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) { 229 mask[r][c] = STAR; 230 // Cover this row and column so that no other zeroes in them can be 231 // starred. 232 rowsCovered[r] = true; 233 colsCovered[c] = true; 234 } 235 } 236 } 237 238 // Clear the covered rows and columns. 239 Arrays.fill(rowsCovered, false); 240 Arrays.fill(colsCovered, false); 241 } 242 243 /** 244 * Test whether the algorithm is done, i.e. we have the optimal assignment. This occurs when there 245 * is exactly one starred zero in each row. 246 * @return true if the algorithm is done 247 */ 248 private boolean testIsDone() { 249 // Cover all columns containing a starred zero. There can be at most one 250 // starred zero per column. Therefore, a covered column has an optimal 251 // assignment. 252 for (int r = 0; r < rows; r++) { 253 for (int c = 0; c < cols; c++) { 254 if (mask[r][c] == STAR) { 255 colsCovered[c] = true; 256 } 257 } 258 } 259 260 // Count the total number of covered columns. 261 int coveredCols = 0; 262 for (int c = 0; c < cols; c++) { 263 coveredCols += colsCovered[c] ? 1 : 0; 264 } 265 266 // Apply an row and column adjustments that are pending. 267 for (int r = 0; r < rows; r++) { 268 for (int c = 0; c < cols; c++) { 269 cost[r][c] += rowAdjust[r]; 270 cost[r][c] += colAdjust[c]; 271 } 272 } 273 274 // Clear the pending row and column adjustments. 275 Arrays.fill(rowAdjust, 0); 276 Arrays.fill(colAdjust, 0); 277 278 // The covers on columns and rows may have been reset, recompute the least 279 // value for each row. 280 for (int r = 0; r < rows; r++) { 281 leastInRow[r] = Float.POSITIVE_INFINITY; 282 for (int c = 0; c < cols; c++) { 283 if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) { 284 leastInRow[r] = cost[r][c]; 285 leastInRowIndex[r] = c; 286 } 287 } 288 } 289 290 // If all columns are covered, then we are done. Since there may be more 291 // columns than rows, we are also done if the number of covered columns is 292 // at least as great as the number of rows. 293 return (coveredCols == cols || coveredCols >= rows); 294 } 295 296 /** 297 * Corresponds to step 1 of the original algorithm. 298 * @return false if all zeroes are covered 299 */ 300 private boolean stepOne() { 301 while (true) { 302 Pair<Integer, Integer> zero = findUncoveredZero(); 303 if (zero == null) { 304 // No uncovered zeroes, need to manipulate the cost matrix in step 305 // three. 306 return false; 307 } else { 308 // Prime the uncovered zero and find a starred zero in the same row. 309 mask[zero.getFirst()][zero.getSecond()] = PRIME; 310 Pair<Integer, Integer> star = starInRow(zero.getFirst()); 311 if (star != null) { 312 // Cover the row with both the newly primed zero and the starred zero. 313 // Since this is the only place where zeroes are primed, and we cover 314 // it here, and rows are only uncovered when primes are erased, then 315 // there can be at most one primed uncovered zero. 316 rowsCovered[star.getFirst()] = true; 317 colsCovered[star.getSecond()] = false; 318 updateMin(star.getFirst(), star.getSecond()); 319 } else { 320 // Will go to step two after, where a path will be constructed, 321 // starting from the uncovered primed zero (there is only one). Since 322 // we have already found it, save it as the first node in the path. 323 path.clear(); 324 path.offerLast(new Pair<>(zero.getFirst(), zero.getSecond())); 325 return true; 326 } 327 } 328 } 329 } 330 331 /** 332 * Corresponds to step 2 of the original algorithm. 333 */ 334 private void stepTwo() { 335 // Construct a path of alternating starred zeroes and primed zeroes, where 336 // each starred zero is in the same column as the previous primed zero, and 337 // each primed zero is in the same row as the previous starred zero. The 338 // path will always end in a primed zero. 339 while (true) { 340 Pair<Integer, Integer> star = starInCol(path.getLast().getSecond()); 341 if (star != null) { 342 path.offerLast(star); 343 } else { 344 break; 345 } 346 Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst()); 347 path.offerLast(prime); 348 } 349 350 // Augment path - unmask all starred zeroes and star all primed zeroes. All 351 // nodes in the path will be either starred or primed zeroes. The set of 352 // starred zeroes is independent and now one larger than before. 353 for (Pair<Integer, Integer> p : path) { 354 if (mask[p.getFirst()][p.getSecond()] == STAR) { 355 mask[p.getFirst()][p.getSecond()] = NONE; 356 } else { 357 mask[p.getFirst()][p.getSecond()] = STAR; 358 } 359 } 360 361 // Clear all covers from rows and columns. 362 Arrays.fill(rowsCovered, false); 363 Arrays.fill(colsCovered, false); 364 365 // Remove the prime mask from all primed zeroes. 366 for (int r = 0; r < rows; r++) { 367 for (int c = 0; c < cols; c++) { 368 if (mask[r][c] == PRIME) { 369 mask[r][c] = NONE; 370 } 371 } 372 } 373 } 374 375 /** 376 * Corresponds to step 3 of the original algorithm. 377 */ 378 private void stepThree() { 379 // Find the minimum uncovered cost. 380 float min = leastInRow[0]; 381 for (int r = 1; r < rows; r++) { 382 if (leastInRow[r] < min) { 383 min = leastInRow[r]; 384 } 385 } 386 387 // Add the minimum cost to each of the costs in a covered row, or subtract 388 // the minimum cost from each of the costs in an uncovered column. As an 389 // optimization, do not actually modify the cost matrix yet, but track the 390 // adjustments that need to be made to each row and column. 391 for (int r = 0; r < rows; r++) { 392 if (rowsCovered[r]) { 393 rowAdjust[r] += min; 394 } 395 } 396 for (int c = 0; c < cols; c++) { 397 if (!colsCovered[c]) { 398 colAdjust[c] -= min; 399 } 400 } 401 402 // Since the cost matrix is not being updated yet, the minimum uncovered 403 // cost per row must be updated. 404 for (int r = 0; r < rows; r++) { 405 if (!colsCovered[leastInRowIndex[r]]) { 406 // The least value in this row was in an uncovered column, meaning that 407 // it would have had the minimum value subtracted from it, and therefore 408 // will still be the minimum value in that row. 409 leastInRow[r] -= min; 410 } else { 411 // The least value in this row was in a covered column and would not 412 // have had the minimum value subtracted from it, so the minimum value 413 // could be some in another column. 414 for (int c = 0; c < cols; c++) { 415 if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) { 416 leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r]; 417 leastInRowIndex[r] = c; 418 } 419 } 420 } 421 } 422 } 423 424 /** 425 * Find a zero cost assignment which is not covered. If there are no zero cost assignments which 426 * are uncovered, then null will be returned. 427 * @return pair of row and column indices of an uncovered zero or null 428 */ 429 private Pair<Integer, Integer> findUncoveredZero() { 430 for (int r = 0; r < rows; r++) { 431 if (leastInRow[r] == 0) { 432 return new Pair<>(r, leastInRowIndex[r]); 433 } 434 } 435 return null; 436 } 437 438 /** 439 * A specified row has become covered, and a specified column has become uncovered. The least 440 * value per row may need to be updated. 441 * @param row the index of the row which was just covered 442 * @param col the index of the column which was just uncovered 443 */ 444 private void updateMin(int row, int col) { 445 // If the row is covered we want to ignore it as far as least values go. 446 leastInRow[row] = Float.POSITIVE_INFINITY; 447 448 for (int r = 0; r < rows; r++) { 449 // Since the column has only just been uncovered, it could not have any 450 // pending adjustments. Only covered rows can have pending adjustments 451 // and covered costs do not count toward row minimums. Therefore, we do 452 // not need to consider rowAdjust[r] or colAdjust[col]. 453 if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) { 454 leastInRow[r] = cost[r][col]; 455 leastInRowIndex[r] = col; 456 } 457 } 458 } 459 460 /** 461 * Find a starred zero in a specified row. If there are no starred zeroes in the specified row, 462 * then null will be returned. 463 * @param r the index of the row to be searched 464 * @return pair of row and column indices of starred zero or null 465 */ 466 private Pair<Integer, Integer> starInRow(int r) { 467 for (int c = 0; c < cols; c++) { 468 if (mask[r][c] == STAR) { 469 return new Pair<>(r, c); 470 } 471 } 472 return null; 473 } 474 475 /** 476 * Find a starred zero in the specified column. If there are no starred zeroes in the specified 477 * row, then null will be returned. 478 * @param c the index of the column to be searched 479 * @return pair of row and column indices of starred zero or null 480 */ 481 private Pair<Integer, Integer> starInCol(int c) { 482 for (int r = 0; r < rows; r++) { 483 if (mask[r][c] == STAR) { 484 return new Pair<>(r, c); 485 } 486 } 487 return null; 488 } 489 490 /** 491 * Find a primed zero in the specified row. If there are no primed zeroes in the specified row, 492 * then null will be returned. 493 * @param r the index of the row to be searched 494 * @return pair of row and column indices of primed zero or null 495 */ 496 private Pair<Integer, Integer> primeInRow(int r) { 497 for (int c = 0; c < cols; c++) { 498 if (mask[r][c] == PRIME) { 499 return new Pair<>(r, c); 500 } 501 } 502 return null; 503 } 504}