public class Hash {
private MessageDigest hash;
public Hash(String algorithm) {
try {
hash = MessageDigest.getInstance(algorithm);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
public String hashCode(String input) {
return DatatypeConverter.printHexBinary(hash.digest(input.getBytes())).toUpperCase();
}
public static void main(String[] args) {
System.out.println("支持的算法 : ");
for (String str : Security.getAlgorithms("MessageDigest")) {
System.out.println(str);
}
System.out.println("=======");
String algorithm = "MD5";
Hash hash = new Hash(algorithm);
String input1 = "zuochengyunzuochengyun1";
String input2 = "zuochengyunzuochengyun2";
String input3 = "zuochengyunzuochengyun3";
String input4 = "zuochengyunzuochengyun4";
String input5 = "zuochengyunzuochengyun5";
System.out.println(hash.hashCode(input1));
System.out.println(hash.hashCode(input2));
System.out.println(hash.hashCode(input3));
System.out.println(hash.hashCode(input4));
System.out.println(hash.hashCode(input5));
}
}
支持的算法 :
SHA-384
SHA-224
SHA-256
MD2
SHA
SHA-512
MD5
=======
B73390C90A49FEF569367FDF894AC89F
451AF105D1062B501663A17F19BA9196
C5A9C27C2BE4EE613311137528AF2D93
E001FC84040DA08AE3B74EB98FD4B468
D0424A6201FD9CB675FC2D40F20DD5D5
举例:假设有一个服务,存储了100亿个黑名单的 url。如果用 HashSet 去存,就得需要大概 640G 内存(一个url就是 64 byte,100亿个url就是 6400亿 byte ≈ 640G),及其浪费空间。而使用布隆过滤器可以极大的节省空间,差不多 20G 就可以实现了。但是,布隆过滤器有一定的失误率,这个失误率是针对非黑名单而言的(即:宁可错杀一千,也不可放过一个),并且这个失误率是可控的。
m 和什么有关?
- 样本量:100亿
- 失误率:0.01%
单样本大小:64 byte(和这个无关)
以上三个公式需要背一下。
接下来的问题是,如果需要 k 多个哈希函数,怎么来?
只需要有两个哈希函数(f1、f2)就可以造出 k 个哈希函数,而且几乎都是独立的。
布隆过滤器的应用-HDFS
http://www.bubuko.com/infodetail-3803533.html
32位无符号整数的范围是0~4,294,967,295,现在有一个正好包含40亿个无符号整数的文件,可以使用最多1GB的内存,怎么找到出现次数最多的数?
32位无符号整数的范围是0~4,294,967,295,现在有一个正好包含40亿个无符号整数的文件,所以在整个范围中必然存在没出现过的数,可以使用最多1GB的内存,怎么找到所有未出现过的数?
进阶:内存限制为3KB,但是只用找到一个没出现过的数即可
再次升级:用有限几个变量,找到一个没出现过的数
有一个包含100亿个URL的大文件,假设每个URL占用64B,请找出其中所有重复的URL。
补充:某搜索公司一天的用户搜索词汇是海量的(百亿数据量),请设计一种求出每天热门Top100词汇的可行办法
32位无符号整数的范围是0~4294967295,现在有40亿个无符号整数,可以使用最多1GB的内存,找出所有出现了两次的数
32位无符号整数的范围是0~4294967295,现在有40亿个无符号整数,可以使用最多3K的内存,怎么找到这40亿个整数的中位数?
32位无符号整数的范围是0~4294967295,有一个10G大小的文件,每一行都装着这种类型的数字,整个文件是无序的,给你5G的内存空间,请你输出一个10G大小的文件,就是原文件所有数字排序的结果。
public static class AVLNode<K extends Comparable<K>, V> {
public K k;
public V v;
public AVLNode<K, V> l;
public AVLNode<K, V> r;
public int h;
public AVLNode(K key, V value) {
k = key;
v = value;
h = 1;
}
}
public static class AVLTreeMap<K extends Comparable<K>, V> {
private AVLNode<K, V> root;
private int size;
public AVLTreeMap() {
root = null;
size = 0;
}
private AVLNode<K, V> rightRotate(AVLNode<K, V> cur) {
AVLNode<K, V> left = cur.l;
cur.l = left.r;
left.r = cur;
cur.h = Math.max((cur.l != null ? cur.l.h : 0), (cur.r != null ? cur.r.h : 0)) + 1;
left.h = Math.max((left.l != null ? left.l.h : 0), left.r.h) + 1;
return left;
}
private AVLNode<K, V> leftRotate(AVLNode<K, V> cur) {
AVLNode<K, V> right = cur.r;
cur.r = right.l;
right.l = cur;
cur.h = Math.max((cur.l != null ? cur.l.h : 0), (cur.r != null ? cur.r.h : 0)) + 1;
right.h = Math.max(right.l.h, (right.r != null ? right.r.h : 0)) + 1;
return right;
}
private AVLNode<K, V> rebalance(AVLNode<K, V> cur) {
if (cur == null) return null;
int leftHeight = cur.l != null ? cur.l.h : 0;
int rightHeight = cur.r != null ? cur.r.h : 0;
if (Math.abs(leftHeight - rightHeight) <= 1) return cur;
if (leftHeight > rightHeight) {
int leftLeftHeight = cur.l != null && cur.l.l != null ? cur.l.l.h : 0;
int leftRightHeight = cur.l != null && cur.l.r != null ? cur.l.r.h : 0;
if (leftLeftHeight >= leftRightHeight) {
return rightRotate(cur);
} else {
cur.l = leftRotate(cur.l);
return rightRotate(cur);
}
} else {
int rightLeftHeight = cur.r != null && cur.r.l != null ? cur.r.l.h : 0;
int rightRightHeight = cur.r != null && cur.r.r != null ? cur.r.r.h : 0;
if (rightRightHeight >= rightLeftHeight) {
return leftRotate(cur);
} else {
cur.r = rightRotate(cur.r);
return leftRotate(cur);
}
}
}
private AVLNode<K, V> findLastIndex(K key) {
AVLNode<K, V> pre = root;
AVLNode<K, V> cur = root;
while (cur != null) {
pre = cur;
if (key.compareTo(cur.k) == 0) {
break;
} else if (key.compareTo(cur.k) < 0) {
cur = cur.l;
} else {
cur = cur.r;
}
}
return pre;
}
private AVLNode<K, V> findLastNoSmallIndex(K key) {
AVLNode<K, V> ans = null;
AVLNode<K, V> cur = root;
while (cur != null) {
if (key.compareTo(cur.k) == 0) {
ans = cur;
break;
} else if (key.compareTo(cur.k) < 0) {
ans = cur;
cur = cur.l;
} else {
cur = cur.r;
}
}
return ans;
}
private AVLNode<K, V> findLastNoBigIndex(K key) {
AVLNode<K, V> ans = null;
AVLNode<K, V> cur = root;
while (cur != null) {
if (key.compareTo(cur.k) == 0) {
ans = cur;
break;
} else if (key.compareTo(cur.k) < 0) {
cur = cur.l;
} else {
ans = cur;
cur = cur.r;
}
}
return ans;
}
private AVLNode<K, V> add(AVLNode<K, V> cur, K key, V value) {
if (cur == null) return new AVLNode<>(key, value);
if (key.compareTo(cur.k) < 0) {
cur.l = add(cur.l, key, value);
} else {
cur.r = add(cur.r, key, value);
}
cur.h = Math.max(cur.l != null ? cur.l.h : 0, cur.r != null ? cur.r.h : 0) + 1;
return rebalance(cur);
}
// 在cur这棵树上,删掉key所代表的节点
// 返回cur这棵树的新头部
private AVLNode<K, V> delete(AVLNode<K, V> cur, K key) {
if (key.compareTo(cur.k) > 0) {
cur.r = delete(cur.r, key);
} else if (key.compareTo(cur.k) < 0) {
cur.l = delete(cur.l, key);
} else {
if (cur.l == null && cur.r == null) {
cur = null;
} else if (cur.l == null) {
cur = cur.r;
} else if (cur.r == null) {
cur = cur.l;
} else {
AVLNode<K, V> des = cur.r;
while (des.l != null) {
des = des.l;
}
cur.r = delete(cur.r, des.k);
des.l = cur.l;
des.r = cur.r;
cur = des;
}
}
if (cur != null) {
cur.h = Math.max(cur.l != null ? cur.l.h : 0, cur.r != null ? cur.r.h : 0) + 1;
}
return rebalance(cur);
}
public int size() {
return size;
}
public boolean containsKey(K key) {
if (key == null) {
return false;
}
AVLNode<K, V> lastNode = findLastIndex(key);
return lastNode != null && key.compareTo(lastNode.k) == 0;
}
public void put(K key, V value) {
if (key == null) {
return;
}
AVLNode<K, V> lastNode = findLastIndex(key);
if (lastNode != null && key.compareTo(lastNode.k) == 0) {
lastNode.v = value;
} else {
size++;
root = add(root, key, value);
}
}
public void remove(K key) {
if (key == null) {
return;
}
if (containsKey(key)) {
size--;
root = delete(root, key);
}
}
public V get(K key) {
if (key == null) {
return null;
}
AVLNode<K, V> lastNode = findLastIndex(key);
if (lastNode != null && key.compareTo(lastNode.k) == 0) {
return lastNode.v;
}
return null;
}
public K firstKey() {
if (root == null) {
return null;
}
AVLNode<K, V> cur = root;
while (cur.l != null) {
cur = cur.l;
}
return cur.k;
}
public K lastKey() {
if (root == null) {
return null;
}
AVLNode<K, V> cur = root;
while (cur.r != null) {
cur = cur.r;
}
return cur.k;
}
public K floorKey(K key) {
if (key == null) {
return null;
}
AVLNode<K, V> lastNoBigNode = findLastNoBigIndex(key);
return lastNoBigNode == null ? null : lastNoBigNode.k;
}
public K ceilingKey(K key) {
if (key == null) {
return null;
}
AVLNode<K, V> lastNoSmallNode = findLastNoSmallIndex(key);
return lastNoSmallNode == null ? null : lastNoSmallNode.k;
}
}
public static class SBTNode<K extends Comparable<K>, V> {
public K key;
public V value;
public SBTNode<K, V> l;
public SBTNode<K, V> r;
public int size; // 不同的key的数量
public SBTNode(K key, V value) {
this.key = key;
this.value = value;
size = 1;
}
}
public static class SizeBalancedTreeMap<K extends Comparable<K>, V> {
private SBTNode<K, V> root;
private SBTNode<K, V> rightRotate(SBTNode<K, V> cur) {
SBTNode<K, V> leftNode = cur.l;
cur.l = leftNode.r;
leftNode.r = cur;
// 区别于AVL树,这里是互换size
leftNode.size = cur.size;
cur.size = (cur.l != null ? cur.l.size : 0) + (cur.r != null ? cur.r.size : 0) + 1;
return leftNode;
}
private SBTNode<K, V> leftRotate(SBTNode<K, V> cur) {
SBTNode<K, V> rightNode = cur.r;
cur.r = rightNode.l;
rightNode.l = cur;
// 区别于AVL树,这里是互换size
rightNode.size = cur.size;
cur.size = (cur.l != null ? cur.l.size : 0) + (cur.r != null ? cur.r.size : 0) + 1;
return rightNode;
}
private SBTNode<K, V> reBalance(SBTNode<K, V> cur) {
if (cur == null) return null;
int leftSize = cur.l != null ? cur.l.size : 0;
int leftLeftSize = cur.l != null && cur.l.l != null ? cur.l.l.size : 0;
int leftRightSize = cur.l != null && cur.l.r != null ? cur.l.r.size : 0;
int rightSize = cur.r != null ? cur.r.size : 0;
int rightLeftSize = cur.r != null && cur.r.l != null ? cur.r.l.size : 0;
int rightRightSize = cur.r != null && cur.r.r != null ? cur.r.r.size : 0;
if (leftLeftSize > rightSize) {
// LL
cur = rightRotate(cur);
cur.r = reBalance(cur.r);
cur = reBalance(cur);
} else if (leftRightSize > rightSize) {
// LR
cur.l = leftRotate(cur.l);
cur = rightRotate(cur);
cur.l = reBalance(cur.l);
cur.r = reBalance(cur.r);
cur = reBalance(cur);
} else if (rightRightSize > leftSize) {
// RR
cur = leftRotate(cur);
cur.l = reBalance(cur.l);
cur = reBalance(cur);
} else if (rightLeftSize > leftSize) {
// RR
cur.r = rightRotate(cur.r);
cur = leftRotate(cur);
cur.l = reBalance(cur.l);
cur.r = reBalance(cur.r);
cur = reBalance(cur);
}
return cur;
}
private SBTNode<K, V> findLastIndex(K key) {
SBTNode<K, V> pre = root;
SBTNode<K, V> cur = root;
while (cur != null) {
pre = cur;
if (key.compareTo(cur.key) == 0) {
break;
} else if (key.compareTo(cur.key) < 0) {
cur = cur.l;
} else {
cur = cur.r