哈希是一种高效查询的方法,其大致思想是将每个值映射成下表并用容器存储,需要查询的时候能够在相应位置直接拿到数据,因此查询的效率是O(1)
1. unordered_map是存储<key, value>键值对的关联式容器,其允许通过keys快速的索引到与
其对应的value。
2. 在unordered_map中,键值通常用于惟一地标识元素,而映射值是一个对象,其内容与此
键关联。键和映射值的类型可能不同。
3. 在内部,unordered_map没有对<kye, value>按照任何特定的顺序排序, 为了能在常数范围内
找到key所对应的value,unordered_map将相同哈希值的键值对放在相同的桶中。
4. unordered_map容器通过key访问单个元素要比map快,但它通常在遍历元素子集的范围迭
代方面效率较低。
5. unordered_maps实现了直接访问操作符(operator[]),它允许使用key作为参数直接访问
value。
6. 它的迭代器至少是前向迭代器。
unordered_map存储的元素是pair<key,value>,通过key将pair映射到相应位置,查询的时候一般采用[? ]的方式
unordered_map模拟实现
#pragma once
#include<vector>
namespace myspace
{
template<class T>
struct HashFunc
{
size_t operator()(const T& key)
{
return (size_t)key;
}
};
template<>
struct HashFunc<string>
{
size_t operator()(const string& key)
{
size_t hash = 0;
for (auto& e : key)
{
hash *= 31;
hash += e;
}
return hash;
}
};
template<class T>
struct HashBucketNode
{
HashBucketNode(const T& data = T())
:_data(data), _next(nullptr)
{}
HashBucketNode* _next;
T _data;
};
template<class K, class V, class KeyOfValue, class HF>
struct HBIterator;
// 为了实现简单,在哈希桶的迭代器类中需要用到hashBucket本身,
template<class K, class V, class KeyOfValue, class HF>
class HashBucket
{
typedef HashBucketNode<V> Node;
HF hf;
KeyOfValue kot;
public:
typedef HBIterator<K, V, KeyOfValue, HF> iterator;
HashBucket()
:_n(0)
{
_ht.resize(10);
}
size_t BucketCount()
{
return _ht.size();
}
size_t BucketSize(const K& key)
{
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
size_t ret = 0;
while (cur)
{
++ret;
cur = cur->_next;
}
return ret;
}
pair<iterator, bool> Insert(const V& data)
{
if (_n == _ht.size())
{
size_t newsize = _ht.size() * 2;
vector<Node*>newht(newsize);
for (int i = 0; i < _ht.size(); i++)
{
Node* cur = _ht[i];
while (cur)
{
Node* next = cur->_next;
cur->_next = nullptr;
K key = kot(cur->_data);
size_t hashi = hf(key) % newsize;
if (newht[hashi])
{
Node* prev = newht[hashi];
while (prev->_next)
{
prev = prev->_next;
}
prev->_next = cur;
}
else
{
newht[hashi] = cur;
}
cur = next;
}
}
_ht.swap(newht);
return Insert(data);
}
K key = kot(data);
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
if (cur)
{
Node* prev = nullptr;
while (cur)
{
if (kot(cur->_data) == kot(data))
{
iterator it(cur, this);
return make_pair(it, false);
}
prev = cur;
cur = cur->_next;
}
Node* newnode = new Node(data);
prev->_next = newnode;
iterator it(newnode, this);
++_n;
return make_pair(it, true);
}
else
{
Node* newnode = new Node(data);
_ht[hashi] = newnode;
iterator it(newnode, this);
++_n;
return make_pair(it, true);
}
}
iterator Erase(iterator pos)
{
pos++;
K key = kot(pos->_data);
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
if (cur == pos->_pNode)
{
_ht[hashi] = nullptr;
delete cur;
}
else
{
while (cur->_next != pos->_pNode)
{
cur = cur->_next;
}
Node* next = cur->_next->_next;
delete cur->_next;
cur->_next = next;
}
--_n;
return pos;
}
size_t Count(const K& key)
{
size_t hashi = hf(key) % _ht.size();
size_t ret = 0;
Node* cur = _ht[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
return 1;
}
cur = cur->_next;
}
return 0;
}
iterator Find(const K& key)
{
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
while (cur)
{
if (cur->_data.first == key)
{
return iterator(cur);
}
}
return end();
}
iterator begin()
{
for (int i = 0; i < BucketCount(); i++)
{
if (_ht[i])
return iterator(_ht[i]);
}
return iterator(nullptr);
}
iterator end()
{
return iterator(nullptr);
}
size_t size()
{
size_t ret = 0;
for (int i = 0; i < BucketCount(); i++)
{
Node* cur = _ht[i];
while (cur)
{
++ret;
cur = cur->_next;
}
}
return ret;
}
bool empty()
{
return size() == 0;
}
private:
vector<Node*> _ht;
size_t _n;
};
template <class K, class V, class KeyOfValue, class HF>
struct HBIterator
{
typedef HashBucket<K, V, KeyOfValue, HF> HashBucket;
typedef HashBucketNode<V> Node;
typedef HBIterator<K, V, KeyOfValue, HF> Self;
KeyOfValue kot;
HF hf;
HBIterator(Node* pNode = nullptr, HashBucket* pHt = nullptr)
:_pNode(pNode), _pHt(pHt)
{}
Self& operator++()
{
// 当前迭代器所指节点后还有节点时直接取其下一个节点
if (_pNode->_next)
_pNode = _pNode->_next;
else
{
// 找下一个不空的桶,返回该桶中第一个节点
size_t bucketNoempty = hf(kot(_pNode->_data)) % _pHt->BucketCount() + 1;
for (; bucketNoempty < _pHt->BucketCount(); ++bucketNoempty)
{
if (_pNode = _pHt->_ht[bucketNoempty])
break;
}
}
return *this;
}
Self operator++(int)
{
Self temp = *this;
++temp;
return temp;
}
V& operator*()
{
return _pNode->_data;
}
V* operator->()
{
return &_pNode->_data;
}
bool operator==(const Self& it) const
{
return _pNode == it._pNode;
}
bool operator!=(const Self& it) const
{
return !(*this == it);
}
Node* _pNode; // 当前迭代器关联的节点
HashBucket* _pHt; // 哈希桶--主要是为了找下一个空桶时候方便
};
}
#include"HashBucket.h"
namespace myspace
{
// unordered_map中存储的是pair<K, V>的键值对,K为key的类型,V为value的类型,HF哈希函数类型
// unordered_map在实现时,只需将hashbucket中的接口重新封装即可
template<class K, class V, class HF = HashFunc<K>>
class unordered_map
{
struct MapKeyOfValue
{
const K& operator()(const pair<K, V>& data)
{
return data.first;
}
};
typedef HashBucket<K, pair<K, V>, MapKeyOfValue, HF> HT;
// 通过key获取value的操作
public:
typedef typename HT::iterator iterator;
public:
unordered_map()
: _ht()
{}
iterator begin() { return _ht.begin(); }
iterator end() { return _ht.end(); }
//
// capacity
size_t size()const { return _ht.size(); }
bool empty()const { return _ht.empty(); }
/
// Acess
V& operator[](const K& key)
{
pair<iterator, bool> ret = _ht.Insert(pair<K, V>(key, V()));
return (ret.first)->second;
}
const V& operator[](const K& key)const
{
pair<iterator, bool> ret = _ht.Insert(pair<K, V>(key, V()));
return (ret.first)->second;
}
// lookup
iterator find(const K& key) { return _ht.Find(key); }
size_t count(const K& key) { return _ht.Count(key); }
/
// modify
pair<iterator, bool> insert(const pair<K, V>& value)
{
return _ht.Insert(value);
}
iterator erase(iterator position)
{
return _ht.Erase(position);
}
// bucket
size_t bucket_count() { return _ht.BucketCount(); }
size_t bucket_size(const K& key) { return _ht.BucketSize(key); }
private:
HT _ht;
};
}
unordered_set和unordered_map类似,因为他们底层都是哈希桶,即HashBucket。他们的区别是unordered_set存储的元素是key,即它的value=key,而unordered_map的value是pair<key,value>
unordered_set模拟实现
#pragma once
#include<vector>
namespace bit
{
template<class T>
struct HashFunc
{
size_t operator()(const T& key)
{
return (size_t)key;
}
};
template<>
struct HashFunc<string>
{
size_t operator()(const string& key)
{
size_t hash = 0;
for (auto& e : key)
{
hash *= 31;
hash += e;
}
return hash;
}
};
template<class T>
struct HashBucketNode
{
HashBucketNode(const T& data = T())
:_data(data), _next(nullptr)
{}
HashBucketNode* _next;
T _data;
};
template<class K, class V, class KeyOfValue, class HF>
struct HBIterator;
// 为了实现简单,在哈希桶的迭代器类中需要用到hashBucket本身,
template<class K, class V, class KeyOfValue, class HF>
class HashBucket
{
typedef HashBucketNode<V> Node;
HF hf;
KeyOfValue kot;
public:
typedef HBIterator<K, V, KeyOfValue, HF> iterator;
HashBucket()
:_n(0)
{
_ht.resize(10);
}
size_t BucketCount()
{
return _ht.size();
}
size_t BucketSize(const K& key)
{
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
size_t ret = 0;
while (cur)
{
++ret;
cur = cur->_next;
}
return ret;
}
pair<iterator, bool> Insert(const V& data)
{
if (_n == _ht.size())
{
size_t newsize = _ht.size() * 2;
vector<Node*>newht(newsize);
for (int i = 0; i < _ht.size(); i++)
{
Node* cur = _ht[i];
while (cur)
{
Node* next = cur->_next;
cur->_next = nullptr;
K key = kot(cur->_data);
size_t hashi = hf(key) % newsize;
if (newht[hashi])
{
Node* prev = newht[hashi];
while (prev->_next)
{
prev = prev->_next;
}
prev->_next = cur;
}
else
{
newht[hashi] = cur;
}
cur = next;
}
}
_ht.swap(newht);
return Insert(data);
}
K key = kot(data);
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
if (cur)
{
Node* prev = nullptr;
while (cur)
{
if (kot(cur->_data) == kot(data))
{
iterator it(cur, this);
return make_pair(it, false);
}
prev = cur;
cur = cur->_next;
}
Node* newnode = new Node(data);
prev->_next = newnode;
iterator it(newnode, this);
++_n;
return make_pair(it, true);
}
else
{
Node* newnode = new Node(data);
_ht[hashi] = newnode;
iterator it(newnode, this);
++_n;
return make_pair(it, true);
}
}
iterator Erase(iterator pos)
{
pos++;
K key = kot(pos->_data);
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
if (cur == pos->_pNode)
{
_ht[hashi] = nullptr;
delete cur;
}
else
{
while (cur->_next != pos->_pNode)
{
cur = cur->_next;
}
Node* next = cur->_next->_next;
delete cur->_next;
cur->_next = next;
}
--_n;
return pos;
}
size_t Count(const K& key)
{
size_t hashi = hf(key) % _ht.size();
size_t ret = 0;
Node* cur = _ht[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
return 1;
}
cur = cur->_next;
}
return 0;
}
iterator Find(const K& key)
{
size_t hashi = hf(key) % _ht.size();
Node* cur = _ht[hashi];
while (cur)
{
if (cur->_data.first == key)
{
return iterator(cur);
}
}
return end();
}
iterator begin()
{
for (int i = 0; i < BucketCount(); i++)
{
if (_ht[i])
return iterator(_ht[i]);
}
return iterator(nullptr);
}
iterator end()
{
return iterator(nullptr);
}
size_t size()
{
size_t ret = 0;
for (int i = 0; i < BucketCount(); i++)
{
Node* cur = _ht[i];
while (cur)
{
++ret;
cur = cur->_next;
}
}
return ret;
}
bool empty()
{
return size() == 0;
}
private:
vector<Node*> _ht;
size_t _n;
};
template <class K, class V, class KeyOfValue, class HF>
struct HBIterator
{
typedef HashBucket<K, V, KeyOfValue, HF> HashBucket;
typedef HashBucketNode<V> Node;
typedef HBIterator<K, V, KeyOfValue, HF> Self;
KeyOfValue kot;
HF hf;
HBIterator(Node* pNode = nullptr, HashBucket* pHt = nullptr)
:_pNode(pNode), _pHt(pHt)
{}
Self& operator++()
{
// 当前迭代器所指节点后还有节点时直接取其下一个节点
if (_pNode->_next)
_pNode = _pNode->_next;
else
{
// 找下一个不空的桶,返回该桶中第一个节点
size_t bucketNoempty = hf(kot(_pNode->_data)) % _pHt->BucketCount() + 1;
for (; bucketNoempty < _pHt->BucketCount(); ++bucketNoempty)
{
if (_pNode = _pHt->_ht[bucketNoempty])
break;
}
}
return *this;
}
Self operator++(int)
{
Self temp = *this;
++temp;
return temp;
}
V& operator*()
{
return _pNode->_data;
}
V* operator->()
{
return &_pNode->_data;
}
bool operator==(const Self& it) const
{
return _pNode == it._pNode;
}
bool operator!=(const Self& it) const
{
return !(*this == it);
}
Node* _pNode; // 当前迭代器关联的节点
HashBucket* _pHt; // 哈希桶--主要是为了找下一个空桶时候方便
};
}
#pragma once
#include"HashBucket.h"
namespace bit
{
template<class K, class HF = HashFunc<K>>
class unordered_set
{
struct SetKeyOfValue
{
const K& operator()(const K& data)
{
return data;
}
};
typedef HashBucket<K, K, SetKeyOfValue, HF> HT;
// 通过key获取value的操作
public:
typedef typename HT::iterator iterator;
public:
unordered_set() : _ht()
{}
iterator begin() { return _ht.begin(); }
iterator end() { return _ht.end(); }
// capacity
size_t size()const { return _ht.size(); }
bool empty()const { return _ht.empty(); }
///
// lookup
iterator find(const K& key) { return _ht.Find(key); }
size_t count(const K& key) { return _ht.Count(key); }
/
// modify
pair<iterator, bool> insert(const K& value)
{
return _ht.Insert(value);
}
iterator erase(iterator position)
{
return _ht.Erase(position);
}
// bucket
size_t bucket_count() { return _ht.BucketCount(); }
size_t bucket_size(const K& key) { return _ht.BucketSize(key); }
private:
HT _ht;
};
}
处理哈希冲突的方法一般是闭散列和开散列,这里实现的是开散列
闭散列:也叫开放定址法,当发生哈希冲突时,如果哈希表未被装满,说明在哈希表中必然还有空位置,那么可以把key存放到冲突位置中的“下一个” 空位置中去。?
只是寻找下一个空位置又有不同的方法,包括线性探测和二次探测等L:
?开散列:选择在相应位置挂链表(如果同一位置元素过多则选择挂红黑树),映射到相同位置的元素直接在链表上尾插即可
?