Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.util.Arrays;
021import java.util.Deque;
022import java.util.LinkedList;
023import org.apache.yetus.audience.InterfaceAudience;
024
025/**
026 * Computes the optimal (minimal cost) assignment of jobs to workers (or other analogous) concepts
027 * given a cost matrix of each pair of job and worker, using the algorithm by James Munkres in
028 * "Algorithms for the Assignment and Transportation Problems", with additional optimizations as
029 * described by Jin Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment
030 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in O(n^3) time and need
031 * O(n^2) auxiliary space where n is the number of jobs or workers, whichever is greater.
032 */
033@InterfaceAudience.Private
034public class MunkresAssignment {
035
036  // The original algorithm by Munkres uses the terms STAR and PRIME to denote
037  // different states of zero values in the cost matrix. These values are
038  // represented as byte constants instead of enums to save space in the mask
039  // matrix by a factor of 4n^2 where n is the size of the problem.
040  private static final byte NONE = 0;
041  private static final byte STAR = 1;
042  private static final byte PRIME = 2;
043
044  // The algorithm requires that the number of column is at least as great as
045  // the number of rows. If that is not the case, then the cost matrix should
046  // be transposed before computation, and the solution matrix transposed before
047  // returning to the caller.
048  private final boolean transposed;
049
050  // The number of rows of internal matrices.
051  private final int rows;
052
053  // The number of columns of internal matrices.
054  private final int cols;
055
056  // The cost matrix, the cost of assigning each row index to column index.
057  private float[][] cost;
058
059  // Mask of zero cost assignment states.
060  private byte[][] mask;
061
062  // Covering some rows of the cost matrix.
063  private boolean[] rowsCovered;
064
065  // Covering some columns of the cost matrix.
066  private boolean[] colsCovered;
067
068  // The alternating path between starred zeroes and primed zeroes
069  private Deque<Pair<Integer, Integer>> path;
070
071  // The solution, marking which rows should be assigned to which columns. The
072  // positions of elements in this array correspond to the rows of the cost
073  // matrix, and the value of each element correspond to the columns of the cost
074  // matrix, i.e. assignments[i] = j indicates that row i should be assigned to
075  // column j.
076  private int[] assignments;
077
078  // Improvements described by Jin Kue Wong cache the least value in each row,
079  // as well as the column index of the least value in each row, and the pending
080  // adjustments to each row and each column.
081  private float[] leastInRow;
082  private int[] leastInRowIndex;
083  private float[] rowAdjust;
084  private float[] colAdjust;
085
086  /**
087   * Construct a new problem instance with the specified cost matrix. The cost matrix must be
088   * rectangular, though not necessarily square. If one dimension is greater than the other, some
089   * elements in the greater dimension will not be assigned. The input cost matrix will not be
090   * modified.
091   */
092  public MunkresAssignment(float[][] costMatrix) {
093    // The algorithm assumes that the number of columns is at least as great as
094    // the number of rows. If this is not the case of the input matrix, then
095    // all internal structures must be transposed relative to the input.
096    this.transposed = costMatrix.length > costMatrix[0].length;
097    if (this.transposed) {
098      this.rows = costMatrix[0].length;
099      this.cols = costMatrix.length;
100    } else {
101      this.rows = costMatrix.length;
102      this.cols = costMatrix[0].length;
103    }
104
105    cost = new float[rows][cols];
106    mask = new byte[rows][cols];
107    rowsCovered = new boolean[rows];
108    colsCovered = new boolean[cols];
109    path = new LinkedList<>();
110
111    leastInRow = new float[rows];
112    leastInRowIndex = new int[rows];
113    rowAdjust = new float[rows];
114    colAdjust = new float[cols];
115
116    assignments = null;
117
118    // Copy cost matrix.
119    if (transposed) {
120      for (int r = 0; r < rows; r++) {
121        for (int c = 0; c < cols; c++) {
122          cost[r][c] = costMatrix[c][r];
123        }
124      }
125    } else {
126      for (int r = 0; r < rows; r++) {
127        System.arraycopy(costMatrix[r], 0, cost[r], 0, cols);
128      }
129    }
130
131    // Costs must be finite otherwise the matrix can get into a bad state where
132    // no progress can be made. If your use case depends on a distinction
133    // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong.
134    for (int r = 0; r < rows; r++) {
135      for (int c = 0; c < cols; c++) {
136        if (cost[r][c] == Float.POSITIVE_INFINITY) {
137          cost[r][c] = Float.MAX_VALUE;
138        }
139      }
140    }
141  }
142
143  /**
144   * Get the optimal assignments. The returned array will have the same number of elements as the
145   * number of elements as the number of rows in the input cost matrix. Each element will indicate
146   * which column should be assigned to that row or -1 if no column should be assigned, i.e. if
147   * result[i] = j then row i should be assigned to column j. Subsequent invocations of this method
148   * will simply return the same object without additional computation.
149   * @return an array with the optimal assignments
150   */
151  public int[] solve() {
152    // If this assignment problem has already been solved, return the known
153    // solution
154    if (assignments != null) {
155      return assignments;
156    }
157
158    preliminaries();
159
160    // Find the optimal assignments.
161    while (!testIsDone()) {
162      while (!stepOne()) {
163        stepThree();
164      }
165      stepTwo();
166    }
167
168    // Extract the assignments from the mask matrix.
169    if (transposed) {
170      assignments = new int[cols];
171      outer: for (int c = 0; c < cols; c++) {
172        for (int r = 0; r < rows; r++) {
173          if (mask[r][c] == STAR) {
174            assignments[c] = r;
175            continue outer;
176          }
177        }
178        // There is no assignment for this row of the input/output.
179        assignments[c] = -1;
180      }
181    } else {
182      assignments = new int[rows];
183      outer: for (int r = 0; r < rows; r++) {
184        for (int c = 0; c < cols; c++) {
185          if (mask[r][c] == STAR) {
186            assignments[r] = c;
187            continue outer;
188          }
189        }
190      }
191    }
192
193    // Once the solution has been computed, there is no need to keep any of the
194    // other internal structures. Clear all unnecessary internal references so
195    // the garbage collector may reclaim that memory.
196    cost = null;
197    mask = null;
198    rowsCovered = null;
199    colsCovered = null;
200    path = null;
201    leastInRow = null;
202    leastInRowIndex = null;
203    rowAdjust = null;
204    colAdjust = null;
205
206    return assignments;
207  }
208
209  /**
210   * Corresponds to the "preliminaries" step of the original algorithm. Guarantees that the matrix
211   * is an equivalent non-negative matrix with at least one zero in each row.
212   */
213  private void preliminaries() {
214    for (int r = 0; r < rows; r++) {
215      // Find the minimum cost of each row.
216      float min = Float.POSITIVE_INFINITY;
217      for (int c = 0; c < cols; c++) {
218        min = Math.min(min, cost[r][c]);
219      }
220
221      // Subtract that minimum cost from each element in the row.
222      for (int c = 0; c < cols; c++) {
223        cost[r][c] -= min;
224
225        // If the element is now zero and there are no zeroes in the same row
226        // or column which are already starred, then star this one. There
227        // must be at least one zero because of subtracting the min cost.
228        if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) {
229          mask[r][c] = STAR;
230          // Cover this row and column so that no other zeroes in them can be
231          // starred.
232          rowsCovered[r] = true;
233          colsCovered[c] = true;
234        }
235      }
236    }
237
238    // Clear the covered rows and columns.
239    Arrays.fill(rowsCovered, false);
240    Arrays.fill(colsCovered, false);
241  }
242
243  /**
244   * Test whether the algorithm is done, i.e. we have the optimal assignment. This occurs when there
245   * is exactly one starred zero in each row.
246   * @return true if the algorithm is done
247   */
248  private boolean testIsDone() {
249    // Cover all columns containing a starred zero. There can be at most one
250    // starred zero per column. Therefore, a covered column has an optimal
251    // assignment.
252    for (int r = 0; r < rows; r++) {
253      for (int c = 0; c < cols; c++) {
254        if (mask[r][c] == STAR) {
255          colsCovered[c] = true;
256        }
257      }
258    }
259
260    // Count the total number of covered columns.
261    int coveredCols = 0;
262    for (int c = 0; c < cols; c++) {
263      coveredCols += colsCovered[c] ? 1 : 0;
264    }
265
266    // Apply an row and column adjustments that are pending.
267    for (int r = 0; r < rows; r++) {
268      for (int c = 0; c < cols; c++) {
269        cost[r][c] += rowAdjust[r];
270        cost[r][c] += colAdjust[c];
271      }
272    }
273
274    // Clear the pending row and column adjustments.
275    Arrays.fill(rowAdjust, 0);
276    Arrays.fill(colAdjust, 0);
277
278    // The covers on columns and rows may have been reset, recompute the least
279    // value for each row.
280    for (int r = 0; r < rows; r++) {
281      leastInRow[r] = Float.POSITIVE_INFINITY;
282      for (int c = 0; c < cols; c++) {
283        if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) {
284          leastInRow[r] = cost[r][c];
285          leastInRowIndex[r] = c;
286        }
287      }
288    }
289
290    // If all columns are covered, then we are done. Since there may be more
291    // columns than rows, we are also done if the number of covered columns is
292    // at least as great as the number of rows.
293    return (coveredCols == cols || coveredCols >= rows);
294  }
295
296  /**
297   * Corresponds to step 1 of the original algorithm.
298   * @return false if all zeroes are covered
299   */
300  private boolean stepOne() {
301    while (true) {
302      Pair<Integer, Integer> zero = findUncoveredZero();
303      if (zero == null) {
304        // No uncovered zeroes, need to manipulate the cost matrix in step
305        // three.
306        return false;
307      } else {
308        // Prime the uncovered zero and find a starred zero in the same row.
309        mask[zero.getFirst()][zero.getSecond()] = PRIME;
310        Pair<Integer, Integer> star = starInRow(zero.getFirst());
311        if (star != null) {
312          // Cover the row with both the newly primed zero and the starred zero.
313          // Since this is the only place where zeroes are primed, and we cover
314          // it here, and rows are only uncovered when primes are erased, then
315          // there can be at most one primed uncovered zero.
316          rowsCovered[star.getFirst()] = true;
317          colsCovered[star.getSecond()] = false;
318          updateMin(star.getFirst(), star.getSecond());
319        } else {
320          // Will go to step two after, where a path will be constructed,
321          // starting from the uncovered primed zero (there is only one). Since
322          // we have already found it, save it as the first node in the path.
323          path.clear();
324          path.offerLast(new Pair<>(zero.getFirst(), zero.getSecond()));
325          return true;
326        }
327      }
328    }
329  }
330
331  /**
332   * Corresponds to step 2 of the original algorithm.
333   */
334  private void stepTwo() {
335    // Construct a path of alternating starred zeroes and primed zeroes, where
336    // each starred zero is in the same column as the previous primed zero, and
337    // each primed zero is in the same row as the previous starred zero. The
338    // path will always end in a primed zero.
339    while (true) {
340      Pair<Integer, Integer> star = starInCol(path.getLast().getSecond());
341      if (star != null) {
342        path.offerLast(star);
343      } else {
344        break;
345      }
346      Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst());
347      path.offerLast(prime);
348    }
349
350    // Augment path - unmask all starred zeroes and star all primed zeroes. All
351    // nodes in the path will be either starred or primed zeroes. The set of
352    // starred zeroes is independent and now one larger than before.
353    for (Pair<Integer, Integer> p : path) {
354      if (mask[p.getFirst()][p.getSecond()] == STAR) {
355        mask[p.getFirst()][p.getSecond()] = NONE;
356      } else {
357        mask[p.getFirst()][p.getSecond()] = STAR;
358      }
359    }
360
361    // Clear all covers from rows and columns.
362    Arrays.fill(rowsCovered, false);
363    Arrays.fill(colsCovered, false);
364
365    // Remove the prime mask from all primed zeroes.
366    for (int r = 0; r < rows; r++) {
367      for (int c = 0; c < cols; c++) {
368        if (mask[r][c] == PRIME) {
369          mask[r][c] = NONE;
370        }
371      }
372    }
373  }
374
375  /**
376   * Corresponds to step 3 of the original algorithm.
377   */
378  private void stepThree() {
379    // Find the minimum uncovered cost.
380    float min = leastInRow[0];
381    for (int r = 1; r < rows; r++) {
382      if (leastInRow[r] < min) {
383        min = leastInRow[r];
384      }
385    }
386
387    // Add the minimum cost to each of the costs in a covered row, or subtract
388    // the minimum cost from each of the costs in an uncovered column. As an
389    // optimization, do not actually modify the cost matrix yet, but track the
390    // adjustments that need to be made to each row and column.
391    for (int r = 0; r < rows; r++) {
392      if (rowsCovered[r]) {
393        rowAdjust[r] += min;
394      }
395    }
396    for (int c = 0; c < cols; c++) {
397      if (!colsCovered[c]) {
398        colAdjust[c] -= min;
399      }
400    }
401
402    // Since the cost matrix is not being updated yet, the minimum uncovered
403    // cost per row must be updated.
404    for (int r = 0; r < rows; r++) {
405      if (!colsCovered[leastInRowIndex[r]]) {
406        // The least value in this row was in an uncovered column, meaning that
407        // it would have had the minimum value subtracted from it, and therefore
408        // will still be the minimum value in that row.
409        leastInRow[r] -= min;
410      } else {
411        // The least value in this row was in a covered column and would not
412        // have had the minimum value subtracted from it, so the minimum value
413        // could be some in another column.
414        for (int c = 0; c < cols; c++) {
415          if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) {
416            leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r];
417            leastInRowIndex[r] = c;
418          }
419        }
420      }
421    }
422  }
423
424  /**
425   * Find a zero cost assignment which is not covered. If there are no zero cost assignments which
426   * are uncovered, then null will be returned.
427   * @return pair of row and column indices of an uncovered zero or null
428   */
429  private Pair<Integer, Integer> findUncoveredZero() {
430    for (int r = 0; r < rows; r++) {
431      if (leastInRow[r] == 0) {
432        return new Pair<>(r, leastInRowIndex[r]);
433      }
434    }
435    return null;
436  }
437
438  /**
439   * A specified row has become covered, and a specified column has become uncovered. The least
440   * value per row may need to be updated.
441   * @param row the index of the row which was just covered
442   * @param col the index of the column which was just uncovered
443   */
444  private void updateMin(int row, int col) {
445    // If the row is covered we want to ignore it as far as least values go.
446    leastInRow[row] = Float.POSITIVE_INFINITY;
447
448    for (int r = 0; r < rows; r++) {
449      // Since the column has only just been uncovered, it could not have any
450      // pending adjustments. Only covered rows can have pending adjustments
451      // and covered costs do not count toward row minimums. Therefore, we do
452      // not need to consider rowAdjust[r] or colAdjust[col].
453      if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) {
454        leastInRow[r] = cost[r][col];
455        leastInRowIndex[r] = col;
456      }
457    }
458  }
459
460  /**
461   * Find a starred zero in a specified row. If there are no starred zeroes in the specified row,
462   * then null will be returned.
463   * @param r the index of the row to be searched
464   * @return pair of row and column indices of starred zero or null
465   */
466  private Pair<Integer, Integer> starInRow(int r) {
467    for (int c = 0; c < cols; c++) {
468      if (mask[r][c] == STAR) {
469        return new Pair<>(r, c);
470      }
471    }
472    return null;
473  }
474
475  /**
476   * Find a starred zero in the specified column. If there are no starred zeroes in the specified
477   * row, then null will be returned.
478   * @param c the index of the column to be searched
479   * @return pair of row and column indices of starred zero or null
480   */
481  private Pair<Integer, Integer> starInCol(int c) {
482    for (int r = 0; r < rows; r++) {
483      if (mask[r][c] == STAR) {
484        return new Pair<>(r, c);
485      }
486    }
487    return null;
488  }
489
490  /**
491   * Find a primed zero in the specified row. If there are no primed zeroes in the specified row,
492   * then null will be returned.
493   * @param r the index of the row to be searched
494   * @return pair of row and column indices of primed zero or null
495   */
496  private Pair<Integer, Integer> primeInRow(int r) {
497    for (int c = 0; c < cols; c++) {
498      if (mask[r][c] == PRIME) {
499        return new Pair<>(r, c);
500      }
501    }
502    return null;
503  }
504}