View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import java.util.Arrays;
22  import java.util.Deque;
23  import java.util.LinkedList;
24  
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  
27  /**
28   * Computes the optimal (minimal cost) assignment of jobs to workers (or other
29   * analogous) concepts given a cost matrix of each pair of job and worker, using
30   * the algorithm by James Munkres in "Algorithms for the Assignment and
31   * Transportation Problems", with additional optimizations as described by Jin
32   * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment
33   * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in
34   * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or
35   * workers, whichever is greater.
36   */
37  @InterfaceAudience.Private
38  public class MunkresAssignment {
39  
40    // The original algorithm by Munkres uses the terms STAR and PRIME to denote
41    // different states of zero values in the cost matrix. These values are
42    // represented as byte constants instead of enums to save space in the mask
43    // matrix by a factor of 4n^2 where n is the size of the problem.
44    private static final byte NONE = 0;
45    private static final byte STAR = 1;
46    private static final byte PRIME = 2;
47  
48    // The algorithm requires that the number of column is at least as great as
49    // the number of rows. If that is not the case, then the cost matrix should
50    // be transposed before computation, and the solution matrix transposed before
51    // returning to the caller.
52    private final boolean transposed;
53  
54    // The number of rows of internal matrices.
55    private final int rows;
56  
57    // The number of columns of internal matrices.
58    private final int cols;
59  
60    // The cost matrix, the cost of assigning each row index to column index.
61    private float[][] cost;
62  
63    // Mask of zero cost assignment states.
64    private byte[][] mask;
65  
66    // Covering some rows of the cost matrix.
67    private boolean[] rowsCovered;
68  
69    // Covering some columns of the cost matrix.
70    private boolean[] colsCovered;
71  
72    // The alternating path between starred zeroes and primed zeroes
73    private Deque<Pair<Integer, Integer>> path;
74  
75    // The solution, marking which rows should be assigned to which columns. The
76    // positions of elements in this array correspond to the rows of the cost
77    // matrix, and the value of each element correspond to the columns of the cost
78    // matrix, i.e. assignments[i] = j indicates that row i should be assigned to
79    // column j.
80    private int[] assignments;
81  
82    // Improvements described by Jin Kue Wong cache the least value in each row,
83    // as well as the column index of the least value in each row, and the pending
84    // adjustments to each row and each column.
85    private float[] leastInRow;
86    private int[] leastInRowIndex;
87    private float[] rowAdjust;
88    private float[] colAdjust;
89  
90    /**
91     * Construct a new problem instance with the specified cost matrix. The cost
92     * matrix must be rectangular, though not necessarily square. If one dimension
93     * is greater than the other, some elements in the greater dimension will not
94     * be assigned. The input cost matrix will not be modified.
95     * @param costMatrix
96     */
97    public MunkresAssignment(float[][] costMatrix) {
98      // The algorithm assumes that the number of columns is at least as great as
99      // the number of rows. If this is not the case of the input matrix, then
100     // all internal structures must be transposed relative to the input.
101     this.transposed = costMatrix.length > costMatrix[0].length;
102     if (this.transposed) {
103       this.rows = costMatrix[0].length;
104       this.cols = costMatrix.length;
105     } else {
106       this.rows = costMatrix.length;
107       this.cols = costMatrix[0].length;
108     }
109 
110     cost = new float[rows][cols];
111     mask = new byte[rows][cols];
112     rowsCovered = new boolean[rows];
113     colsCovered = new boolean[cols];
114     path = new LinkedList<Pair<Integer, Integer>>();
115 
116     leastInRow = new float[rows];
117     leastInRowIndex = new int[rows];
118     rowAdjust = new float[rows];
119     colAdjust = new float[cols];
120 
121     assignments = null;
122 
123     // Copy cost matrix.
124     if (transposed) {
125       for (int r = 0; r < rows; r++) {
126         for (int c = 0; c < cols; c++) {
127           cost[r][c] = costMatrix[c][r];
128         }
129       }
130     } else {
131       for (int r = 0; r < rows; r++) {
132         System.arraycopy(costMatrix[r], 0, cost[r], 0, cols);
133       }
134     }
135 
136     // Costs must be finite otherwise the matrix can get into a bad state where
137     // no progress can be made. If your use case depends on a distinction
138     // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong.
139     for (int r = 0; r < rows; r++) {
140       for (int c = 0; c < cols; c++) {
141         if (cost[r][c] == Float.POSITIVE_INFINITY) {
142           cost[r][c] = Float.MAX_VALUE;
143         }
144       }
145     }
146   }
147 
148   /**
149    * Get the optimal assignments. The returned array will have the same number
150    * of elements as the number of elements as the number of rows in the input
151    * cost matrix. Each element will indicate which column should be assigned to
152    * that row or -1 if no column should be assigned, i.e. if result[i] = j then
153    * row i should be assigned to column j. Subsequent invocations of this method
154    * will simply return the same object without additional computation.
155    * @return an array with the optimal assignments
156    */
157   public int[] solve() {
158     // If this assignment problem has already been solved, return the known
159     // solution
160     if (assignments != null) {
161       return assignments;
162     }
163 
164     preliminaries();
165 
166     // Find the optimal assignments.
167     while (!testIsDone()) {
168       while (!stepOne()) {
169         stepThree();
170       }
171       stepTwo();
172     }
173 
174     // Extract the assignments from the mask matrix.
175     if (transposed) {
176       assignments = new int[cols];
177       outer:
178       for (int c = 0; c < cols; c++) {
179         for (int r = 0; r < rows; r++) {
180           if (mask[r][c] == STAR) {
181             assignments[c] = r;
182             continue outer;
183           }
184         }
185         // There is no assignment for this row of the input/output.
186         assignments[c] = -1;
187       }
188     } else {
189       assignments = new int[rows];
190       outer:
191       for (int r = 0; r < rows; r++) {
192         for (int c = 0; c < cols; c++) {
193           if (mask[r][c] == STAR) {
194             assignments[r] = c;
195             continue outer;
196           }
197         }
198       }
199     }
200 
201     // Once the solution has been computed, there is no need to keep any of the
202     // other internal structures. Clear all unnecessary internal references so
203     // the garbage collector may reclaim that memory.
204     cost = null;
205     mask = null;
206     rowsCovered = null;
207     colsCovered = null;
208     path = null;
209     leastInRow = null;
210     leastInRowIndex = null;
211     rowAdjust = null;
212     colAdjust = null;
213 
214     return assignments;
215   }
216 
217   /**
218    * Corresponds to the "preliminaries" step of the original algorithm.
219    * Guarantees that the matrix is an equivalent non-negative matrix with at
220    * least one zero in each row.
221    */
222   private void preliminaries() {
223     for (int r = 0; r < rows; r++) {
224       // Find the minimum cost of each row.
225       float min = Float.POSITIVE_INFINITY;
226       for (int c = 0; c < cols; c++) {
227         min = Math.min(min, cost[r][c]);
228       }
229 
230       // Subtract that minimum cost from each element in the row.
231       for (int c = 0; c < cols; c++) {
232         cost[r][c] -= min;
233 
234         // If the element is now zero and there are no zeroes in the same row
235         // or column which are already starred, then star this one. There
236         // must be at least one zero because of subtracting the min cost.
237         if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) {
238           mask[r][c] = STAR;
239           // Cover this row and column so that no other zeroes in them can be
240           // starred.
241           rowsCovered[r] = true;
242           colsCovered[c] = true;
243         }
244       }
245     }
246 
247     // Clear the covered rows and columns.
248     Arrays.fill(rowsCovered, false);
249     Arrays.fill(colsCovered, false);
250   }
251 
252   /**
253    * Test whether the algorithm is done, i.e. we have the optimal assignment.
254    * This occurs when there is exactly one starred zero in each row.
255    * @return true if the algorithm is done
256    */
257   private boolean testIsDone() {
258     // Cover all columns containing a starred zero. There can be at most one
259     // starred zero per column. Therefore, a covered column has an optimal
260     // assignment.
261     for (int r = 0; r < rows; r++) {
262       for (int c = 0; c < cols; c++) {
263         if (mask[r][c] == STAR) {
264           colsCovered[c] = true;
265         }
266       }
267     }
268 
269     // Count the total number of covered columns.
270     int coveredCols = 0;
271     for (int c = 0; c < cols; c++) {
272       coveredCols += colsCovered[c] ? 1 : 0;
273     }
274 
275     // Apply an row and column adjustments that are pending.
276     for (int r = 0; r < rows; r++) {
277       for (int c = 0; c < cols; c++) {
278         cost[r][c] += rowAdjust[r];
279         cost[r][c] += colAdjust[c];
280       }
281     }
282 
283     // Clear the pending row and column adjustments.
284     Arrays.fill(rowAdjust, 0);
285     Arrays.fill(colAdjust, 0);
286 
287     // The covers on columns and rows may have been reset, recompute the least
288     // value for each row.
289     for (int r = 0; r < rows; r++) {
290       leastInRow[r] = Float.POSITIVE_INFINITY;
291       for (int c = 0; c < cols; c++) {
292         if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) {
293           leastInRow[r] = cost[r][c];
294           leastInRowIndex[r] = c;
295         }
296       }
297     }
298 
299     // If all columns are covered, then we are done. Since there may be more
300     // columns than rows, we are also done if the number of covered columns is
301     // at least as great as the number of rows.
302     return (coveredCols == cols || coveredCols >= rows);
303   }
304 
305   /**
306    * Corresponds to step 1 of the original algorithm.
307    * @return false if all zeroes are covered
308    */
309   private boolean stepOne() {
310     while (true) {
311       Pair<Integer, Integer> zero = findUncoveredZero();
312       if (zero == null) {
313         // No uncovered zeroes, need to manipulate the cost matrix in step
314         // three.
315         return false;
316       } else {
317         // Prime the uncovered zero and find a starred zero in the same row.
318         mask[zero.getFirst()][zero.getSecond()] = PRIME;
319         Pair<Integer, Integer> star = starInRow(zero.getFirst());
320         if (star != null) {
321           // Cover the row with both the newly primed zero and the starred zero.
322           // Since this is the only place where zeroes are primed, and we cover
323           // it here, and rows are only uncovered when primes are erased, then
324           // there can be at most one primed uncovered zero.
325           rowsCovered[star.getFirst()] = true;
326           colsCovered[star.getSecond()] = false;
327           updateMin(star.getFirst(), star.getSecond());
328         } else {
329           // Will go to step two after, where a path will be constructed,
330           // starting from the uncovered primed zero (there is only one). Since
331           // we have already found it, save it as the first node in the path.
332           path.clear();
333           path.offerLast(new Pair<Integer, Integer>(zero.getFirst(),
334               zero.getSecond()));
335           return true;
336         }
337       }
338     }
339   }
340 
341   /**
342    * Corresponds to step 2 of the original algorithm.
343    */
344   private void stepTwo() {
345     // Construct a path of alternating starred zeroes and primed zeroes, where
346     // each starred zero is in the same column as the previous primed zero, and
347     // each primed zero is in the same row as the previous starred zero. The
348     // path will always end in a primed zero.
349     while (true) {
350       Pair<Integer, Integer> star = starInCol(path.getLast().getSecond());
351       if (star != null) {
352         path.offerLast(star);
353       } else {
354         break;
355       }
356       Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst());
357       path.offerLast(prime);
358     }
359 
360     // Augment path - unmask all starred zeroes and star all primed zeroes. All
361     // nodes in the path will be either starred or primed zeroes. The set of
362     // starred zeroes is independent and now one larger than before.
363     for (Pair<Integer, Integer> p : path) {
364       if (mask[p.getFirst()][p.getSecond()] == STAR) {
365         mask[p.getFirst()][p.getSecond()] = NONE;
366       } else {
367         mask[p.getFirst()][p.getSecond()] = STAR;
368       }
369     }
370 
371     // Clear all covers from rows and columns.
372     Arrays.fill(rowsCovered, false);
373     Arrays.fill(colsCovered, false);
374 
375     // Remove the prime mask from all primed zeroes.
376     for (int r = 0; r < rows; r++) {
377       for (int c = 0; c < cols; c++) {
378         if (mask[r][c] == PRIME) {
379           mask[r][c] = NONE;
380         }
381       }
382     }
383   }
384 
385   /**
386    * Corresponds to step 3 of the original algorithm.
387    */
388   private void stepThree() {
389     // Find the minimum uncovered cost.
390     float min = leastInRow[0];
391     for (int r = 1; r < rows; r++) {
392       if (leastInRow[r] < min) {
393         min = leastInRow[r];
394       }
395     }
396 
397     // Add the minimum cost to each of the costs in a covered row, or subtract
398     // the minimum cost from each of the costs in an uncovered column. As an
399     // optimization, do not actually modify the cost matrix yet, but track the
400     // adjustments that need to be made to each row and column.
401     for (int r = 0; r < rows; r++) {
402       if (rowsCovered[r]) {
403         rowAdjust[r] += min;
404       }
405     }
406     for (int c = 0; c < cols; c++) {
407       if (!colsCovered[c]) {
408         colAdjust[c] -= min;
409       }
410     }
411 
412     // Since the cost matrix is not being updated yet, the minimum uncovered
413     // cost per row must be updated.
414     for (int r = 0; r < rows; r++) {
415       if (!colsCovered[leastInRowIndex[r]]) {
416         // The least value in this row was in an uncovered column, meaning that
417         // it would have had the minimum value subtracted from it, and therefore
418         // will still be the minimum value in that row.
419         leastInRow[r] -= min;
420       } else {
421         // The least value in this row was in a covered column and would not
422         // have had the minimum value subtracted from it, so the minimum value
423         // could be some in another column.
424         for (int c = 0; c < cols; c++) {
425           if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) {
426             leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r];
427             leastInRowIndex[r] = c;
428           }
429         }
430       }
431     }
432   }
433 
434   /**
435    * Find a zero cost assignment which is not covered. If there are no zero cost
436    * assignments which are uncovered, then null will be returned.
437    * @return pair of row and column indices of an uncovered zero or null
438    */
439   private Pair<Integer, Integer> findUncoveredZero() {
440     for (int r = 0; r < rows; r++) {
441       if (leastInRow[r] == 0) {
442         return new Pair<Integer, Integer>(r, leastInRowIndex[r]);
443       }
444     }
445     return null;
446   }
447 
448   /**
449    * A specified row has become covered, and a specified column has become
450    * uncovered. The least value per row may need to be updated.
451    * @param row the index of the row which was just covered
452    * @param col the index of the column which was just uncovered
453    */
454   private void updateMin(int row, int col) {
455     // If the row is covered we want to ignore it as far as least values go.
456     leastInRow[row] = Float.POSITIVE_INFINITY;
457 
458     for (int r = 0; r < rows; r++) {
459       // Since the column has only just been uncovered, it could not have any
460       // pending adjustments. Only covered rows can have pending adjustments
461       // and covered costs do not count toward row minimums. Therefore, we do
462       // not need to consider rowAdjust[r] or colAdjust[col].
463       if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) {
464         leastInRow[r] = cost[r][col];
465         leastInRowIndex[r] = col;
466       }
467     }
468   }
469 
470   /**
471    * Find a starred zero in a specified row. If there are no starred zeroes in
472    * the specified row, then null will be returned.
473    * @param r the index of the row to be searched
474    * @return pair of row and column indices of starred zero or null
475    */
476   private Pair<Integer, Integer> starInRow(int r) {
477     for (int c = 0; c < cols; c++) {
478       if (mask[r][c] == STAR) {
479         return new Pair<Integer, Integer>(r, c);
480       }
481     }
482     return null;
483   }
484 
485   /**
486    * Find a starred zero in the specified column. If there are no starred zeroes
487    * in the specified row, then null will be returned.
488    * @param c the index of the column to be searched
489    * @return pair of row and column indices of starred zero or null
490    */
491   private Pair<Integer, Integer> starInCol(int c) {
492     for (int r = 0; r < rows; r++) {
493       if (mask[r][c] == STAR) {
494         return new Pair<Integer, Integer>(r, c);
495       }
496     }
497     return null;
498   }
499 
500   /**
501    * Find a primed zero in the specified row. If there are no primed zeroes in
502    * the specified row, then null will be returned.
503    * @param r the index of the row to be searched
504    * @return pair of row and column indices of primed zero or null
505    */
506   private Pair<Integer, Integer> primeInRow(int r) {
507     for (int c = 0; c < cols; c++) {
508       if (mask[r][c] == PRIME) {
509         return new Pair<Integer, Integer>(r, c);
510       }
511     }
512     return null;
513   }
514 }