001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.util.ArrayList;
021import java.util.Iterator;
022import java.util.List;
023import java.util.concurrent.ThreadLocalRandom;
024import java.util.stream.Stream;
025import org.apache.yetus.audience.InterfaceAudience;
026
027import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
028
029/**
030 * The simple version of reservoir sampling implementation. It is enough for the usage in HBase.
031 * <p/>
032 * See https://en.wikipedia.org/wiki/Reservoir_sampling.
033 */
034@InterfaceAudience.Private
035public class ReservoirSample<T> {
036
037  private final List<T> r;
038
039  private final int k;
040
041  private int n;
042
043  public ReservoirSample(int k) {
044    Preconditions.checkArgument(k > 0, "negative sampling number(%d) is not allowed");
045    r = new ArrayList<>(k);
046    this.k = k;
047  }
048
049  public void add(T t) {
050    if (n < k) {
051      r.add(t);
052    } else {
053      int j = ThreadLocalRandom.current().nextInt(n + 1);
054      if (j < k) {
055        r.set(j, t);
056      }
057    }
058    n++;
059  }
060
061  public void add(Iterator<T> iter) {
062    iter.forEachRemaining(this::add);
063  }
064
065  public void add(Stream<T> s) {
066    s.forEachOrdered(this::add);
067  }
068
069  public List<T> getSamplingResult() {
070    return r;
071  }
072}