clean history

This commit is contained in:
Looly
2019-08-14 10:02:32 +08:00
commit 6b011af032
1215 changed files with 159913 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
<?xml version='1.0' encoding='utf-8'?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<packaging>jar</packaging>
<parent>
<groupId>cn.hutool</groupId>
<artifactId>hutool-parent</artifactId>
<version>4.6.2-SNAPSHOT</version>
</parent>
<artifactId>hutool-bloomFilter</artifactId>
<name>${project.artifactId}</name>
<description>Hutool 布隆过滤器</description>
<dependencies>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-core</artifactId>
<version>${project.parent.version}</version>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,79 @@
package cn.hutool.bloomfilter;
import cn.hutool.bloomfilter.filter.DefaultFilter;
import cn.hutool.bloomfilter.filter.ELFFilter;
import cn.hutool.bloomfilter.filter.JSFilter;
import cn.hutool.bloomfilter.filter.PJWFilter;
import cn.hutool.bloomfilter.filter.SDBMFilter;
import cn.hutool.core.util.NumberUtil;
/**
* BlommFilter 实现 <br>
* 1.构建hash算法 <br>
* 2.散列hash映射到数组的bit位置 <br>
* 3.验证<br>
* 此实现方式可以指定Hash算法
*
* @author Ansj
*/
public class BitMapBloomFilter implements BloomFilter{
private static final long serialVersionUID = 1L;
private BloomFilter[] filters;
/**
* 构造使用默认的5个过滤器
* @param m M值决定BitMap的大小
*/
public BitMapBloomFilter(int m) {
int mNum =NumberUtil.div(String.valueOf(m), String.valueOf(5)).intValue();
long size = (long) (1L * mNum * 1024 * 1024 * 8);
filters = new BloomFilter[]{
new DefaultFilter(size),
new ELFFilter(size),
new JSFilter(size),
new PJWFilter(size),
new SDBMFilter(size)
};
}
/**
* 使用自定的多个过滤器建立BloomFilter
*
* @param m M值决定BitMap的大小
* @param filters Bloom过滤器列表
*/
public BitMapBloomFilter(int m, BloomFilter... filters) {
this(m);
this.filters = filters;
}
/**
* 增加字符串到Filter映射中
* @param str 字符串
*/
@Override
public boolean add(String str) {
boolean flag = true;
for (BloomFilter filter : filters) {
flag |= filter.add(str);
}
return flag;
}
/**
* 是否可能包含此字符串,此处存在误判
* @param str 字符串
* @return 是否存在
*/
@Override
public boolean contains(String str) {
for (BloomFilter filter : filters) {
if (filter.contains(str) == false) {
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,145 @@
package cn.hutool.bloomfilter;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.BitSet;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.util.HashUtil;
/**
* BloomFilter实现方式2此方式使用BitSet存储。<br>
* Hash算法的使用使用固定顺序只需指定个数既可
* @author loolly
*
*/
public class BitSetBloomFilter implements BloomFilter{
private static final long serialVersionUID = 1L;
private BitSet bitSet;
private int bitSetSize;
private int addedElements;
private int hashFunctionNumber;
/**
* 构造一个布隆过滤器过滤器的容量为c * n 个bit.
*
* @param c 当前过滤器预先开辟的最大包含记录,通常要比预计存入的记录多一倍.
* @param n 当前过滤器预计所要包含的记录.
* @param k 哈希函数的个数等同每条记录要占用的bit数.
*/
public BitSetBloomFilter(int c, int n, int k) {
this.hashFunctionNumber = k;
this.bitSetSize = (int) Math.ceil(c * k);
this.addedElements = n;
this.bitSet = new BitSet(this.bitSetSize);
}
/**
* 通过文件初始化过滤器.
*
* @param path 文件路径
* @param charset 字符集
* @throws IOException IO异常
*/
public void init(String path, String charset) throws IOException {
BufferedReader reader = FileUtil.getReader(path, charset);
try {
String line;
while(true) {
line = reader.readLine();
if(line == null) {
break;
}
this.add(line);
}
}finally {
IoUtil.close(reader);
}
}
@Override
public boolean add(String str) {
if (contains(str)) {
return false;
}
int[] positions = createHashes(str, hashFunctionNumber);
for (int i = 0; i < positions.length; i++) {
int position = Math.abs(positions[i] % bitSetSize);
bitSet.set(position, true);
}
return true;
}
/**
* 判定是否包含指定字符串
* @param str 字符串
* @return 是否包含,存在误差
*/
@Override
public boolean contains(String str) {
int[] positions = createHashes(str, hashFunctionNumber);
for (int i : positions) {
int position = Math.abs(i % bitSetSize);
if (!bitSet.get(position)) {
return false;
}
}
return true;
}
/**
* @return 得到当前过滤器的错误率.
*/
public double getFalsePositiveProbability() {
// (1 - e^(-k * n / m)) ^ k
return Math.pow((1 - Math.exp(-hashFunctionNumber * (double) addedElements / bitSetSize)), hashFunctionNumber);
}
/**
* 将字符串的字节表示进行多哈希编码.
*
* @param str 待添加进过滤器的字符串字节表示.
* @param hashNumber 要经过的哈希个数.
* @return 各个哈希的结果数组.
*/
public static int[] createHashes(String str, int hashNumber) {
int[] result = new int[hashNumber];
for(int i = 0; i < hashNumber; i++) {
result[i] = hash(str, i);
}
return result;
}
/**
* 计算Hash值
* @param str 被计算Hash的字符串
* @param k Hash算法序号
* @return Hash值
*/
public static int hash(String str, int k) {
switch (k) {
case 0:
return HashUtil.rsHash(str);
case 1:
return HashUtil.jsHash(str);
case 2:
return HashUtil.elfHash(str);
case 3:
return HashUtil.bkdrHash(str);
case 4:
return HashUtil.apHash(str);
case 5:
return HashUtil.djbHash(str);
case 6:
return HashUtil.sdbmHash(str);
case 7:
return HashUtil.pjwHash(str);
default:
return 0;
}
}
}

View File

@@ -0,0 +1,29 @@
package cn.hutool.bloomfilter;
import java.io.Serializable;
/**
* Bloom filter 是由 Howard Bloom 在 1970 年提出的二进制向量数据结构,它具有很好的空间和时间效率,被用来检测一个元素是不是集合中的一个成员。<br>
* 如果检测结果为是,该元素不一定在集合中;但如果检测结果为否,该元素一定不在集合中。<br>
* 因此Bloom filter具有100%的召回率。这样每个检测请求返回有“在集合内(可能错误)”和“不在集合内(绝对不在集合内)”两种情况。<br>
* @author Looly
*
*/
public interface BloomFilter extends Serializable{
/**
*
* @param str 字符串
* @return 判断一个字符串是否bitMap中存在
*/
public boolean contains(String str);
/**
* 在boolean的bitMap中增加一个字符串<br>
* 如果存在就返回<code>false</code> .如果不存在.先增加这个字符串.再返回<code>true</code>
*
* @param str 字符串
* @return 是否加入成功,如果存在就返回<code>false</code> .如果不存在返回<code>true</code>
*/
public boolean add(String str);
}

View File

@@ -0,0 +1,32 @@
package cn.hutool.bloomfilter;
/**
* 布隆过滤器工具
*
* @author looly
* @since 4.1.5
*/
public class BloomFilterUtil {
/**
* 创建一个BitSet实现的布隆过滤器过滤器的容量为c * n 个bit.
*
* @param c 当前过滤器预先开辟的最大包含记录,通常要比预计存入的记录多一倍.
* @param n 当前过滤器预计所要包含的记录.
* @param k 哈希函数的个数等同每条记录要占用的bit数.
* @return BitSetBloomFilter
*/
public static BitSetBloomFilter createBitSet(int c, int n, int k) {
return new BitSetBloomFilter(c, n, k);
}
/**
* 创建BitMap实现的布隆过滤器
*
* @param m BitMap的大小
* @return BitMapBloomFilter
*/
public static BitMapBloomFilter createBitMap(int m) {
return new BitMapBloomFilter(m);
}
}

View File

@@ -0,0 +1,34 @@
package cn.hutool.bloomfilter.bitMap;
/**
* BitMap接口用于将某个int或long值映射到一个数组中从而判定某个值是否存在
*
* @author looly
*
*/
public interface BitMap{
public final int MACHINE32 = 32;
public final int MACHINE64 = 64;
/**
* 加入值
*
* @param i 值
*/
public void add(long i);
/**
* 检查是否包含值
*
* @param i 值
*/
public boolean contains(long i);
/**
* 移除值
*
* @param i 值
*/
public void remove(long i);
}

View File

@@ -0,0 +1,56 @@
package cn.hutool.bloomfilter.bitMap;
import java.io.Serializable;
/**
* 过滤器BitMap在32位机器上.这个类能发生更好的效果.一般情况下建议使用此类
*
* @author loolly
*
*/
public class IntMap implements BitMap, Serializable {
private static final long serialVersionUID = 1L;
private int[] ints = null;
/**
* 构造
*/
public IntMap() {
ints = new int[93750000];
}
/**
* 构造
*
* @param size 容量
*/
public IntMap(int size) {
ints = new int[size];
}
@Override
public void add(long i) {
int r = (int) (i / BitMap.MACHINE32);
int c = (int) (i % BitMap.MACHINE32);
ints[r] = (int) (ints[r] | (1 << c));
}
@Override
public boolean contains(long i) {
int r = (int) (i / BitMap.MACHINE32);
int c = (int) (i % BitMap.MACHINE32);
if (((int) ((ints[r] >>> c)) & 1) == 1) {
return true;
}
return false;
}
@Override
public void remove(long i) {
int r = (int) (i / BitMap.MACHINE32);
int c = (int) (i % BitMap.MACHINE32);
ints[r] &= ~(1 << c);
}
}

View File

@@ -0,0 +1,56 @@
package cn.hutool.bloomfilter.bitMap;
import java.io.Serializable;
/**
* 过滤器BitMap在64位机器上.这个类能发生更好的效果.一般机器不建议使用
*
* @author loolly
*
*/
public class LongMap implements BitMap, Serializable {
private static final long serialVersionUID = 1L;
private long[] longs = null;
/**
* 构造
*/
public LongMap() {
longs = new long[93750000];
}
/**
* 构造
*
* @param size 容量
*/
public LongMap(int size) {
longs = new long[size];
}
@Override
public void add(long i) {
int r = (int) (i / BitMap.MACHINE64);
long c = i % BitMap.MACHINE64;
longs[r] = longs[r] | (1 << c);
}
@Override
public boolean contains(long i) {
int r = (int) (i / BitMap.MACHINE64);
long c = i % BitMap.MACHINE64;
if (((longs[r] >>> c) & 1) == 1) {
return true;
}
return false;
}
@Override
public void remove(long i) {
int r = (int) (i / BitMap.MACHINE64);
long c = i % BitMap.MACHINE64;
longs[r] &= ~(1 << c);
}
}

View File

@@ -0,0 +1,7 @@
/**
* BitMap实现
*
* @author looly
*
*/
package cn.hutool.bloomfilter.bitMap;

View File

@@ -0,0 +1,83 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.bloomfilter.BloomFilter;
import cn.hutool.bloomfilter.bitMap.BitMap;
import cn.hutool.bloomfilter.bitMap.IntMap;
import cn.hutool.bloomfilter.bitMap.LongMap;
/**
* 抽象Bloom过滤器
*
* @author loolly
*
*/
public abstract class AbstractFilter implements BloomFilter {
private static final long serialVersionUID = 1L;
private BitMap bm = null;
protected long size = 0;
/**
* 构造
*
* @param maxValue 最大值
* @param machineNum 机器位数
*/
public AbstractFilter(long maxValue, int machineNum) {
init(maxValue, machineNum);
}
/**
* 构造32位
*
* @param maxValue 最大值
*/
public AbstractFilter(long maxValue) {
this(maxValue, BitMap.MACHINE32);
}
/**
* 初始化
*
* @param maxValue 最大值
* @param machineNum 机器位数
*/
public void init(long maxValue, int machineNum) {
this.size = maxValue;
switch (machineNum) {
case BitMap.MACHINE32:
bm = new IntMap((int) (size / machineNum));
break;
case BitMap.MACHINE64:
bm = new LongMap((int) (size / machineNum));
break;
default:
throw new RuntimeException("Error Machine number!");
}
}
@Override
public boolean contains(String str) {
return bm.contains(Math.abs(hash(str)));
}
@Override
public boolean add(String str) {
final long hash = Math.abs(hash(str));
if (bm.contains(hash)) {
return false;
}
bm.add(hash);
return true;
}
/**
* 自定义Hash方法
*
* @param str 字符串
* @return HashCode
*/
public abstract long hash(String str);
}

View File

@@ -0,0 +1,25 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
/**
* 默认Bloom过滤器使用Java自带的Hash算法
* @author loolly
*
*/
public class DefaultFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public DefaultFilter(long maxValue, int MACHINENUM) {
super(maxValue, MACHINENUM);
}
public DefaultFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.javaDefaultHash(str) % size;
}
}

View File

@@ -0,0 +1,21 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
public class ELFFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public ELFFilter(long maxValue, int MACHINENUM) {
super(maxValue, MACHINENUM);
}
public ELFFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.elfHash(str) % size;
}
}

View File

@@ -0,0 +1,21 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
public class FNVFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public FNVFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public FNVFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.fnvHash(str);
}
}

View File

@@ -0,0 +1,31 @@
package cn.hutool.bloomfilter.filter;
public class HfFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public HfFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public HfFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
int length = str.length() ;
long hash = 0;
for (int i = 0; i < length; i++) {
hash += str.charAt(i) * 3 * i;
}
if (hash < 0) {
hash = -hash;
}
return hash % size;
}
}

View File

@@ -0,0 +1,24 @@
package cn.hutool.bloomfilter.filter;
public class HfIpFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public HfIpFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public HfIpFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
int length = str.length();
long hash = 0;
for (int i = 0; i < length; i++) {
hash += str.charAt(i % 4) ^ str.charAt(i);
}
return hash % size;
}
}

View File

@@ -0,0 +1,30 @@
package cn.hutool.bloomfilter.filter;
public class JSFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public JSFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public JSFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
int hash = 1315423911;
for (int i = 0; i < str.length(); i++) {
hash ^= ((hash << 5) + str.charAt(i) + (hash >> 2));
}
if(hash<0) {
hash*=-1 ;
}
return hash % size;
}
}

View File

@@ -0,0 +1,21 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
public class PJWFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public PJWFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public PJWFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.pjwHash(str) % size;
}
}

View File

@@ -0,0 +1,21 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
public class RSFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public RSFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public RSFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.rsHash(str) % size;
}
}

View File

@@ -0,0 +1,21 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
public class SDBMFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public SDBMFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public SDBMFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.sdbmHash(str) % size;
}
}

View File

@@ -0,0 +1,22 @@
package cn.hutool.bloomfilter.filter;
import cn.hutool.core.util.HashUtil;
public class TianlFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
public TianlFilter(long maxValue, int machineNum) {
super(maxValue, machineNum);
}
public TianlFilter(long maxValue) {
super(maxValue);
}
@Override
public long hash(String str) {
return HashUtil.tianlHash(str) % size;
}
}

View File

@@ -0,0 +1,7 @@
/**
* 各种Hash算法的过滤器实现
*
* @author looly
*
*/
package cn.hutool.bloomfilter.filter;

View File

@@ -0,0 +1,7 @@
/**
* 布隆过滤提供一些Hash算法的布隆过滤
*
* @author looly
*
*/
package cn.hutool.bloomfilter;

View File

@@ -0,0 +1,55 @@
package cn.hutool.bloomfilter;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import cn.hutool.bloomfilter.bitMap.IntMap;
import cn.hutool.bloomfilter.bitMap.LongMap;
public class BitMapBloomFilterTest {
@Test
public void filterTest() {
BitMapBloomFilter filter = new BitMapBloomFilter(10);
filter.add("123");
filter.add("abc");
filter.add("ddd");
Assert.assertTrue(filter.contains("abc"));
Assert.assertTrue(filter.contains("ddd"));
Assert.assertTrue(filter.contains("123"));
}
@Test
@Ignore
public void testIntMap(){
IntMap intMap = new IntMap();
for (int i = 0 ; i < 32; i++) {
intMap.add(i);
}
intMap.remove(30);
for (int i = 0; i < 32; i++) {
System.out.println(i + "是否存在-->" + intMap.contains(i));
}
}
@Test
@Ignore
public void testLongMap(){
LongMap longMap = new LongMap();
for (int i = 0 ; i < 64; i++) {
longMap.add(i);
}
longMap.remove(30);
for (int i = 0; i < 64; i++) {
System.out.println(i + "是否存在-->" + longMap.contains(i));
}
}
}